[8790ab]: / demo / run_magma.ipynb

Download this file

1202 lines (1201 with data), 50.1 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run MAGMA on KGWAS Sumstats to get gene-based p-values\n",
    "Given the local bfile where it stores the genotype data, you can run MAGMA to get gene-based p-values. Here is an example code to run it assuming you have (1) saved a trained model under `./data/model/test` (2) downloaded magma executable from [here](https://cncr.nl/research/magma/) (3) have a genotype file for your cohort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All required data files are present.\n",
      "--loading KG---\n",
      "--using enformer SNP embedding--\n",
      "--using random go embedding--\n",
      "--using ESM gene embedding--\n",
      "Loading example GWAS file...\n",
      "Example file already exists locally.\n",
      "Loading GWAS file from ./data/biochemistry_Creatinine_fastgwa_full_10000_1.fastGWA...\n",
      "Using ldsc weight...\n",
      "ldsc_weight mean:  0.9999999999999993\n",
      "Annotation file already exists locally.\n",
      "MAGMA command executed successfully.\n",
      "Output: Welcome to MAGMA v1.10 (linux)\n",
      "Using flags:\n",
      "\t--bfile /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1\n",
      "\t--gene-annot ./data/gene_annotation.genes.annot\n",
      "\t--pval ./data//model_pred/new_experiments/test_magma_format.csv N=9988\n",
      "\t--out ./data//model_pred/new_experiments/test_magma_out\n",
      "\n",
      "Start time is 21:07:31, Monday 25 Nov 2024\n",
      "\n",
      "Loading PLINK-format data...\n",
      "Reading file /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1.fam... 48769 individuals read\n",
      "Reading file /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1.bim... 784256 SNPs read\n",
      "Preparing file /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1.bed... \n",
      "\n",
      "Reading SNP p-values from file ./data//model_pred/new_experiments/test_magma_format.csv... \n",
      "\tdetected 2 variables in file\n",
      "\tusing variable: SNP (SNP id)\n",
      "\tusing variable: P (p-value)\n",
      "\tread 542759 lines from file, containing valid SNP p-values for 542758 SNPs in data (100% of lines, 69.21% of SNPs in data)\n",
      "Loading gene annotation from file ./data/gene_annotation.genes.annot... \n",
      "\t17919 gene definitions read from file\n",
      "\tfound 16637 genes containing valid SNPs in genotype data\n",
      "\n",
      "\n",
      "Starting gene analysis... \n",
      "\tusing model: SNPwise-mean\n",
      "\tengaging automatic 'big-data' mode\n",
      "\tprocessed genes: 50 (0.3%)     \n",
      "\tprocessed genes: 68 (0.4%)     \n",
      "\tprocessed genes: 82 (0.5%)     \n",
      "\tprocessed genes: 110 (0.7%)     \n",
      "\tprocessed genes: 134 (0.8%)     \n",
      "\tprocessed genes: 147 (0.9%)     \n",
      "\tprocessed genes: 176 (1.1%)     \n",
      "\tprocessed genes: 192 (1.2%)     \n",
      "\tprocessed genes: 208 (1.3%)     \n",
      "\tprocessed genes: 227 (1.4%)     \n",
      "\tprocessed genes: 243 (1.5%)     \n",
      "\tprocessed genes: 273 (1.6%)     \n",
      "\tprocessed genes: 313 (1.9%)     \n",
      "\tprocessed genes: 351 (2.1%)     \n",
      "\tprocessed genes: 398 (2.4%)     \n",
      "\tprocessed genes: 435 (2.6%)     \n",
      "\tprocessed genes: 478 (2.9%)     \n",
      "\tprocessed genes: 507 (3%)     \n",
      "\tprocessed genes: 530 (3.2%)     \n",
      "\tprocessed genes: 569 (3.4%)     \n",
      "\tprocessed genes: 585 (3.5%)     \n",
      "\tprocessed genes: 622 (3.7%)     \n",
      "\tprocessed genes: 640 (3.8%)     \n",
      "\tprocessed genes: 652 (3.9%)     \n",
      "\tprocessed genes: 668 (4%)     \n",
      "\tprocessed genes: 686 (4.1%)     \n",
      "\tprocessed genes: 708 (4.3%)     \n",
      "\tprocessed genes: 743 (4.5%)     \n",
      "\tprocessed genes: 786 (4.7%)     \n",
      "\tprocessed genes: 802 (4.8%)     \n",
      "\tprocessed genes: 846 (5.1%)     \n",
      "\tprocessed genes: 888 (5.3%)     \n",
      "\tprocessed genes: 910 (5.5%)     \n",
      "\tprocessed genes: 950 (5.7%)     \n",
      "\tprocessed genes: 1066 (6.4%)     \n",
      "\tprocessed genes: 1099 (6.6%)     \n",
      "\tprocessed genes: 1142 (6.9%)     \n",
      "\tprocessed genes: 1167 (7%)     \n",
      "\tprocessed genes: 1206 (7.2%)     \n",
      "\tprocessed genes: 1238 (7.4%)     \n",
      "\tprocessed genes: 1261 (7.6%)     \n",
      "\tprocessed genes: 1275 (7.7%)     \n",
      "\tprocessed genes: 1302 (7.8%)     \n",
      "\tprocessed genes: 1332 (8%)     \n",
      "\tprocessed genes: 1346 (8.1%)     \n",
      "\tprocessed genes: 1380 (8.3%)     \n",
      "\tprocessed genes: 1402 (8.4%)     \n",
      "\tprocessed genes: 1444 (8.7%)     \n",
      "\tprocessed genes: 1484 (8.9%)     \n",
      "\tprocessed genes: 1505 (9%)     \n",
      "\tprocessed genes: 1532 (9.2%)     \n",
      "\tprocessed genes: 1556 (9.4%)     \n",
      "\tprocessed genes: 1572 (9.4%)     \n",
      "\tprocessed genes: 1609 (9.7%)     \n",
      "\tprocessed genes: 1648 (9.9%)     \n",
      "\tprocessed genes: 1668 (10%)     \n",
      "\tprocessed genes: 1677 (10.1%)     \n",
      "\tprocessed genes: 1694 (10.2%)     \n",
      "\tprocessed genes: 1699 (10.2%)     \n",
      "\tprocessed genes: 1718 (10.3%)     \n",
      "\tprocessed genes: 1722 (10.4%)     \n",
      "\tprocessed genes: 1761 (10.6%)     \n",
      "\tprocessed genes: 1783 (10.7%)     \n",
      "\tprocessed genes: 1821 (10.9%)     \n",
      "\tprocessed genes: 1862 (11.2%)     \n",
      "\tprocessed genes: 1906 (11.5%)     \n",
      "\tprocessed genes: 1916 (11.5%)     \n",
      "\tprocessed genes: 1927 (11.6%)     \n",
      "\tprocessed genes: 1942 (11.7%)     \n",
      "\tprocessed genes: 1973 (11.9%)     \n",
      "\tprocessed genes: 1986 (11.9%)     \n",
      "\tprocessed genes: 2005 (12.1%)     \n",
      "\tprocessed genes: 2019 (12.1%)     \n",
      "\tprocessed genes: 2062 (12.4%)     \n",
      "\tprocessed genes: 2103 (12.6%)     \n",
      "\tprocessed genes: 2138 (12.9%)     \n",
      "\tprocessed genes: 2147 (12.9%)     \n",
      "\tprocessed genes: 2228 (13.4%)     \n",
      "\tprocessed genes: 2254 (13.5%)     \n",
      "\tprocessed genes: 2285 (13.7%)     \n",
      "\tprocessed genes: 2313 (13.9%)     \n",
      "\tprocessed genes: 2346 (14.1%)     \n",
      "\tprocessed genes: 2374 (14.3%)     \n",
      "\tprocessed genes: 2391 (14.4%)     \n",
      "\tprocessed genes: 2412 (14.5%)     \n",
      "\tprocessed genes: 2431 (14.6%)     \n",
      "\tprocessed genes: 2450 (14.7%)     \n",
      "\tprocessed genes: 2462 (14.8%)     \n",
      "\tprocessed genes: 2480 (14.9%)     \n",
      "\tprocessed genes: 2502 (15%)     \n",
      "\tprocessed genes: 2530 (15.2%)     \n",
      "\tprocessed genes: 2551 (15.3%)     \n",
      "\tprocessed genes: 2585 (15.5%)     \n",
      "\tprocessed genes: 2629 (15.8%)     \n",
      "\tprocessed genes: 2644 (15.9%)     \n",
      "\tprocessed genes: 2668 (16%)     \n",
      "\tprocessed genes: 2686 (16.1%)     \n",
      "\tprocessed genes: 2737 (16.5%)     \n",
      "\tprocessed genes: 2760 (16.6%)     \n",
      "\tprocessed genes: 2776 (16.7%)     \n",
      "\tprocessed genes: 2806 (16.9%)     \n",
      "\tprocessed genes: 2818 (16.9%)     \n",
      "\tprocessed genes: 2824 (17%)     \n",
      "\tprocessed genes: 2842 (17.1%)     \n",
      "\tprocessed genes: 2875 (17.3%)     \n",
      "\tprocessed genes: 2881 (17.3%)     \n",
      "\tprocessed genes: 2892 (17.4%)     \n",
      "\tprocessed genes: 2928 (17.6%)     \n",
      "\tprocessed genes: 2945 (17.7%)     \n",
      "\tprocessed genes: 2957 (17.8%)     \n",
      "\tprocessed genes: 2975 (17.9%)     \n",
      "\tprocessed genes: 2984 (17.9%)     \n",
      "\tprocessed genes: 3002 (18%)     \n",
      "\tprocessed genes: 3025 (18.2%)     \n",
      "\tprocessed genes: 3058 (18.4%)     \n",
      "\tprocessed genes: 3076 (18.5%)     \n",
      "\tprocessed genes: 3115 (18.7%)     \n",
      "\tprocessed genes: 3167 (19%)     \n",
      "\tprocessed genes: 3207 (19.3%)     \n",
      "\tprocessed genes: 3249 (19.5%)     \n",
      "\tprocessed genes: 3254 (19.6%)     \n",
      "\tprocessed genes: 3266 (19.6%)     \n",
      "\tprocessed genes: 3281 (19.7%)     \n",
      "\tprocessed genes: 3285 (19.7%)     \n",
      "\tprocessed genes: 3296 (19.8%)     \n",
      "\tprocessed genes: 3314 (19.9%)     \n",
      "\tprocessed genes: 3317 (19.9%)     \n",
      "\tprocessed genes: 3366 (20.2%)     \n",
      "\tprocessed genes: 3406 (20.5%)     \n",
      "\tprocessed genes: 3424 (20.6%)     \n",
      "\tprocessed genes: 3462 (20.8%)     \n",
      "\tprocessed genes: 3479 (20.9%)     \n",
      "\tprocessed genes: 3495 (21%)     \n",
      "\tprocessed genes: 3530 (21.2%)     \n",
      "\tprocessed genes: 3543 (21.3%)     \n",
      "\tprocessed genes: 3562 (21.4%)     \n",
      "\tprocessed genes: 3593 (21.6%)     \n",
      "\tprocessed genes: 3610 (21.7%)     \n",
      "\tprocessed genes: 3654 (22%)     \n",
      "\tprocessed genes: 3675 (22.1%)     \n",
      "\tprocessed genes: 3697 (22.2%)     \n",
      "\tprocessed genes: 3722 (22.4%)     \n",
      "\tprocessed genes: 3724 (22.4%)     \n",
      "\tprocessed genes: 3775 (22.7%)     \n",
      "\tprocessed genes: 3795 (22.8%)     \n",
      "\tprocessed genes: 3802 (22.9%)     \n",
      "\tprocessed genes: 3810 (22.9%)     \n",
      "\tprocessed genes: 3827 (23%)     \n",
      "\tprocessed genes: 3857 (23.2%)     \n",
      "\tprocessed genes: 3906 (23.5%)     \n",
      "\tprocessed genes: 3921 (23.6%)     \n",
      "\tprocessed genes: 3927 (23.6%)     \n",
      "\tprocessed genes: 3935 (23.7%)     \n",
      "\tprocessed genes: 3945 (23.7%)     \n",
      "\tprocessed genes: 3967 (23.8%)     \n",
      "\tprocessed genes: 3978 (23.9%)     \n",
      "\tprocessed genes: 3996 (24%)     \n",
      "\tprocessed genes: 4024 (24.2%)     \n",
      "\tprocessed genes: 4035 (24.3%)     \n",
      "\tprocessed genes: 4070 (24.5%)     \n",
      "\tprocessed genes: 4097 (24.6%)     \n",
      "\tprocessed genes: 4152 (25%)     \n",
      "\tprocessed genes: 4179 (25.1%)     \n",
      "\tprocessed genes: 4208 (25.3%)     \n",
      "\tprocessed genes: 4233 (25.4%)     \n",
      "\tprocessed genes: 4240 (25.5%)     \n",
      "\tprocessed genes: 4270 (25.7%)     \n",
      "\tprocessed genes: 4293 (25.8%)     \n",
      "\tprocessed genes: 4313 (25.9%)     \n",
      "\tprocessed genes: 4371 (26.3%)     \n",
      "\tprocessed genes: 4402 (26.5%)     \n",
      "\tprocessed genes: 4418 (26.6%)     \n",
      "\tprocessed genes: 4447 (26.7%)     \n",
      "\tprocessed genes: 4466 (26.8%)     \n",
      "\tprocessed genes: 4483 (26.9%)     \n",
      "\tprocessed genes: 4503 (27.1%)     \n",
      "\tprocessed genes: 4534 (27.3%)     \n",
      "\tprocessed genes: 4570 (27.5%)     \n",
      "\tprocessed genes: 4584 (27.6%)     \n",
      "\tprocessed genes: 4592 (27.6%)     \n",
      "\tprocessed genes: 4611 (27.7%)     \n",
      "\tprocessed genes: 4637 (27.9%)     \n",
      "\tprocessed genes: 4676 (28.1%)     \n",
      "\tprocessed genes: 4705 (28.3%)     \n",
      "\tprocessed genes: 4748 (28.5%)     \n",
      "\tprocessed genes: 4769 (28.7%)     \n",
      "\tprocessed genes: 4785 (28.8%)     \n",
      "\tprocessed genes: 4798 (28.8%)     \n",
      "\tprocessed genes: 4820 (29%)     \n",
      "\tprocessed genes: 4857 (29.2%)     \n",
      "\tprocessed genes: 4881 (29.3%)     \n",
      "\tprocessed genes: 4913 (29.5%)     \n",
      "\tprocessed genes: 4958 (29.8%)     \n",
      "\tprocessed genes: 4988 (30%)     \n",
      "\tprocessed genes: 5019 (30.2%)     \n",
      "\tprocessed genes: 5033 (30.3%)     \n",
      "\tprocessed genes: 5045 (30.3%)     \n",
      "\tprocessed genes: 5049 (30.3%)     \n",
      "\tprocessed genes: 5053 (30.4%)     \n",
      "\tprocessed genes: 5059 (30.4%)     \n",
      "\tprocessed genes: 5074 (30.5%)     \n",
      "\tprocessed genes: 5078 (30.5%)     \n",
      "\tprocessed genes: 5091 (30.6%)     \n",
      "\tprocessed genes: 5113 (30.7%)     \n",
      "\tprocessed genes: 5137 (30.9%)     \n",
      "\tprocessed genes: 5149 (30.9%)     \n",
      "\tprocessed genes: 5169 (31.1%)     \n",
      "\tprocessed genes: 5197 (31.2%)     \n",
      "\tprocessed genes: 5208 (31.3%)     \n",
      "\tprocessed genes: 5234 (31.5%)     \n",
      "\tprocessed genes: 5277 (31.7%)     \n",
      "\tprocessed genes: 5298 (31.8%)     \n",
      "\tprocessed genes: 5324 (32%)     \n",
      "\tprocessed genes: 5342 (32.1%)     \n",
      "\tprocessed genes: 5357 (32.2%)     \n",
      "\tprocessed genes: 5375 (32.3%)     \n",
      "\tprocessed genes: 5395 (32.4%)     \n",
      "\tprocessed genes: 5420 (32.6%)     \n",
      "\tprocessed genes: 5468 (32.9%)     \n",
      "\tprocessed genes: 5493 (33%)     \n",
      "\tprocessed genes: 5511 (33.1%)     \n",
      "\tprocessed genes: 5522 (33.2%)     \n",
      "\tprocessed genes: 5533 (33.3%)     \n",
      "\tprocessed genes: 5541 (33.3%)     \n",
      "\tprocessed genes: 5546 (33.3%)     \n",
      "\tprocessed genes: 5553 (33.4%)     \n",
      "\tprocessed genes: 5563 (33.4%)     \n",
      "\tprocessed genes: 5578 (33.5%)     \n",
      "\tprocessed genes: 5590 (33.6%)     \n",
      "\tprocessed genes: 5598 (33.6%)     \n",
      "\tprocessed genes: 5609 (33.7%)     \n",
      "\tprocessed genes: 5611 (33.7%)     \n",
      "\tprocessed genes: 5618 (33.8%)     \n",
      "\tprocessed genes: 5623 (33.8%)     \n",
      "\tprocessed genes: 5629 (33.8%)     \n",
      "\tprocessed genes: 5632 (33.9%)     \n",
      "\tprocessed genes: 5645 (33.9%)     \n",
      "\tprocessed genes: 5655 (34%)     \n",
      "\tprocessed genes: 5664 (34%)     \n",
      "\tprocessed genes: 5672 (34.1%)     \n",
      "\tprocessed genes: 5686 (34.2%)     \n",
      "\tprocessed genes: 5698 (34.2%)     \n",
      "\tprocessed genes: 5709 (34.3%)     \n",
      "\tprocessed genes: 5720 (34.4%)     \n",
      "\tprocessed genes: 5749 (34.6%)     \n",
      "\tprocessed genes: 5792 (34.8%)     \n",
      "\tprocessed genes: 5805 (34.9%)     \n",
      "\tprocessed genes: 5843 (35.1%)     \n",
      "\tprocessed genes: 5864 (35.2%)     \n",
      "\tprocessed genes: 5885 (35.4%)     \n",
      "\tprocessed genes: 5932 (35.7%)     \n",
      "\tprocessed genes: 5969 (35.9%)     \n",
      "\tprocessed genes: 6004 (36.1%)     \n",
      "\tprocessed genes: 6043 (36.3%)     \n",
      "\tprocessed genes: 6061 (36.4%)     \n",
      "\tprocessed genes: 6081 (36.6%)     \n",
      "\tprocessed genes: 6111 (36.7%)     \n",
      "\tprocessed genes: 6138 (36.9%)     \n",
      "\tprocessed genes: 6155 (37%)     \n",
      "\tprocessed genes: 6174 (37.1%)     \n",
      "\tprocessed genes: 6183 (37.2%)     \n",
      "\tprocessed genes: 6194 (37.2%)     \n",
      "\tprocessed genes: 6203 (37.3%)     \n",
      "\tprocessed genes: 6227 (37.4%)     \n",
      "\tprocessed genes: 6230 (37.4%)     \n",
      "\tprocessed genes: 6240 (37.5%)     \n",
      "\tprocessed genes: 6281 (37.8%)     \n",
      "\tprocessed genes: 6293 (37.8%)     \n",
      "\tprocessed genes: 6317 (38%)     \n",
      "\tprocessed genes: 6332 (38.1%)     \n",
      "\tprocessed genes: 6342 (38.1%)     \n",
      "\tprocessed genes: 6356 (38.2%)     \n",
      "\tprocessed genes: 6377 (38.3%)     \n",
      "\tprocessed genes: 6405 (38.5%)     \n",
      "\tprocessed genes: 6426 (38.6%)     \n",
      "\tprocessed genes: 6438 (38.7%)     \n",
      "\tprocessed genes: 6452 (38.8%)     \n",
      "\tprocessed genes: 6464 (38.9%)     \n",
      "\tprocessed genes: 6499 (39.1%)     \n",
      "\tprocessed genes: 6518 (39.2%)     \n",
      "\tprocessed genes: 6564 (39.5%)     \n",
      "\tprocessed genes: 6605 (39.7%)     \n",
      "\tprocessed genes: 6606 (39.7%)     \n",
      "\tprocessed genes: 6616 (39.8%)     \n",
      "\tprocessed genes: 6649 (40%)     \n",
      "\tprocessed genes: 6700 (40.3%)     \n",
      "\tprocessed genes: 6751 (40.6%)     \n",
      "\tprocessed genes: 6770 (40.7%)     \n",
      "\tprocessed genes: 6782 (40.8%)     \n",
      "\tprocessed genes: 6799 (40.9%)     \n",
      "\tprocessed genes: 6832 (41.1%)     \n",
      "\tprocessed genes: 6889 (41.4%)     \n",
      "\tprocessed genes: 6900 (41.5%)     \n",
      "\tprocessed genes: 6921 (41.6%)     \n",
      "\tprocessed genes: 6941 (41.7%)     \n",
      "\tprocessed genes: 6955 (41.8%)     \n",
      "\tprocessed genes: 6984 (42%)     \n",
      "\tprocessed genes: 7037 (42.3%)     \n",
      "\tprocessed genes: 7056 (42.4%)     \n",
      "\tprocessed genes: 7065 (42.5%)     \n",
      "\tprocessed genes: 7071 (42.5%)     \n",
      "\tprocessed genes: 7088 (42.6%)     \n",
      "\tprocessed genes: 7106 (42.7%)     \n",
      "\tprocessed genes: 7108 (42.7%)     \n",
      "\tprocessed genes: 7124 (42.8%)     \n",
      "\tprocessed genes: 7143 (42.9%)     \n",
      "\tprocessed genes: 7163 (43.1%)     \n",
      "\tprocessed genes: 7182 (43.2%)     \n",
      "\tprocessed genes: 7206 (43.3%)     \n",
      "\tprocessed genes: 7223 (43.4%)     \n",
      "\tprocessed genes: 7258 (43.6%)     \n",
      "\tprocessed genes: 7285 (43.8%)     \n",
      "\tprocessed genes: 7318 (44%)     \n",
      "\tprocessed genes: 7344 (44.1%)     \n",
      "\tprocessed genes: 7356 (44.2%)     \n",
      "\tprocessed genes: 7408 (44.5%)     \n",
      "\tprocessed genes: 7459 (44.8%)     \n",
      "\tprocessed genes: 7484 (45%)     \n",
      "\tprocessed genes: 7502 (45.1%)     \n",
      "\tprocessed genes: 7524 (45.2%)     \n",
      "\tprocessed genes: 7553 (45.4%)     \n",
      "\tprocessed genes: 7567 (45.5%)     \n",
      "\tprocessed genes: 7575 (45.5%)     \n",
      "\tprocessed genes: 7617 (45.8%)     \n",
      "\tprocessed genes: 7657 (46%)     \n",
      "\tprocessed genes: 7667 (46.1%)     \n",
      "\tprocessed genes: 7682 (46.2%)     \n",
      "\tprocessed genes: 7684 (46.2%)     \n",
      "\tprocessed genes: 7697 (46.3%)     \n",
      "\tprocessed genes: 7708 (46.3%)     \n",
      "\tprocessed genes: 7735 (46.5%)     \n",
      "\tprocessed genes: 7803 (46.9%)     \n",
      "\tprocessed genes: 7835 (47.1%)     \n",
      "\tprocessed genes: 7853 (47.2%)     \n",
      "\tprocessed genes: 7876 (47.3%)     \n",
      "\tprocessed genes: 7911 (47.6%)     \n",
      "\tprocessed genes: 7942 (47.7%)     \n",
      "\tprocessed genes: 7967 (47.9%)     \n",
      "\tprocessed genes: 8006 (48.1%)     \n",
      "\tprocessed genes: 8022 (48.2%)     \n",
      "\tprocessed genes: 8046 (48.4%)     \n",
      "\tprocessed genes: 8059 (48.4%)     \n",
      "\tprocessed genes: 8061 (48.5%)     \n",
      "\tprocessed genes: 8099 (48.7%)     \n",
      "\tprocessed genes: 8133 (48.9%)     \n",
      "\tprocessed genes: 8184 (49.2%)     \n",
      "\tprocessed genes: 8196 (49.3%)     \n",
      "\tprocessed genes: 8207 (49.3%)     \n",
      "\tprocessed genes: 8229 (49.5%)     \n",
      "\tprocessed genes: 8234 (49.5%)     \n",
      "\tprocessed genes: 8248 (49.6%)     \n",
      "\tprocessed genes: 8269 (49.7%)     \n",
      "\tprocessed genes: 8306 (49.9%)     \n",
      "\tprocessed genes: 8316 (50%)     \n",
      "\tprocessed genes: 8341 (50.1%)     \n",
      "\tprocessed genes: 8354 (50.2%)     \n",
      "\tprocessed genes: 8364 (50.3%)     \n",
      "\tprocessed genes: 8373 (50.3%)     \n",
      "\tprocessed genes: 8390 (50.4%)     \n",
      "\tprocessed genes: 8401 (50.5%)     \n",
      "\tprocessed genes: 8410 (50.5%)     \n",
      "\tprocessed genes: 8426 (50.6%)     \n",
      "\tprocessed genes: 8450 (50.8%)     \n",
      "\tprocessed genes: 8497 (51.1%)     \n",
      "\tprocessed genes: 8517 (51.2%)     \n",
      "\tprocessed genes: 8539 (51.3%)     \n",
      "\tprocessed genes: 8555 (51.4%)     \n",
      "\tprocessed genes: 8580 (51.6%)     \n",
      "\tprocessed genes: 8591 (51.6%)     \n",
      "\tprocessed genes: 8623 (51.8%)     \n",
      "\tprocessed genes: 8626 (51.8%)     \n",
      "\tprocessed genes: 8645 (52%)     \n",
      "\tprocessed genes: 8663 (52.1%)     \n",
      "\tprocessed genes: 8703 (52.3%)     \n",
      "\tprocessed genes: 8732 (52.5%)     \n",
      "\tprocessed genes: 8760 (52.7%)     \n",
      "\tprocessed genes: 8792 (52.8%)     \n",
      "\tprocessed genes: 8828 (53.1%)     \n",
      "\tprocessed genes: 8847 (53.2%)     \n",
      "\tprocessed genes: 8866 (53.3%)     \n",
      "\tprocessed genes: 8889 (53.4%)     \n",
      "\tprocessed genes: 8919 (53.6%)     \n",
      "\tprocessed genes: 8936 (53.7%)     \n",
      "\tprocessed genes: 8944 (53.8%)     \n",
      "\tprocessed genes: 8958 (53.8%)     \n",
      "\tprocessed genes: 9022 (54.2%)     \n",
      "\tprocessed genes: 9043 (54.4%)     \n",
      "\tprocessed genes: 9070 (54.5%)     \n",
      "\tprocessed genes: 9100 (54.7%)     \n",
      "\tprocessed genes: 9128 (54.9%)     \n",
      "\tprocessed genes: 9147 (55%)     \n",
      "\tprocessed genes: 9159 (55.1%)     \n",
      "\tprocessed genes: 9172 (55.1%)     \n",
      "\tprocessed genes: 9179 (55.2%)     \n",
      "\tprocessed genes: 9186 (55.2%)     \n",
      "\tprocessed genes: 9195 (55.3%)     \n",
      "\tprocessed genes: 9215 (55.4%)     \n",
      "\tprocessed genes: 9241 (55.5%)     \n",
      "\tprocessed genes: 9246 (55.6%)     \n",
      "\tprocessed genes: 9277 (55.8%)     \n",
      "\tprocessed genes: 9300 (55.9%)     \n",
      "\tprocessed genes: 9312 (56%)     \n",
      "\tprocessed genes: 9363 (56.3%)     \n",
      "\tprocessed genes: 9466 (56.9%)     \n",
      "\tprocessed genes: 9503 (57.1%)     \n",
      "\tprocessed genes: 9543 (57.4%)     \n",
      "\tprocessed genes: 9575 (57.6%)     \n",
      "\tprocessed genes: 9625 (57.9%)     \n",
      "\tprocessed genes: 9674 (58.1%)     \n",
      "\tprocessed genes: 9697 (58.3%)     \n",
      "\tprocessed genes: 9719 (58.4%)     \n",
      "\tprocessed genes: 9750 (58.6%)     \n",
      "\tprocessed genes: 9774 (58.7%)     \n",
      "\tprocessed genes: 9784 (58.8%)     \n",
      "\tprocessed genes: 9792 (58.9%)     \n",
      "\tprocessed genes: 9831 (59.1%)     \n",
      "\tprocessed genes: 9845 (59.2%)     \n",
      "\tprocessed genes: 9882 (59.4%)     \n",
      "\tprocessed genes: 9923 (59.6%)     \n",
      "\tprocessed genes: 9950 (59.8%)     \n",
      "\tprocessed genes: 9981 (60%)     \n",
      "\tprocessed genes: 10006 (60.1%)     \n",
      "\tprocessed genes: 10019 (60.2%)     \n",
      "\tprocessed genes: 10058 (60.5%)     \n",
      "\tprocessed genes: 10076 (60.6%)     \n",
      "\tprocessed genes: 10093 (60.7%)     \n",
      "\tprocessed genes: 10095 (60.7%)     \n",
      "\tprocessed genes: 10096 (60.7%)     \n",
      "\tprocessed genes: 10120 (60.8%)     \n",
      "\tprocessed genes: 10130 (60.9%)     \n",
      "\tprocessed genes: 10139 (60.9%)     \n",
      "\tprocessed genes: 10150 (61%)     \n",
      "\tprocessed genes: 10185 (61.2%)     \n",
      "\tprocessed genes: 10211 (61.4%)     \n",
      "\tprocessed genes: 10242 (61.6%)     \n",
      "\tprocessed genes: 10266 (61.7%)     \n",
      "\tprocessed genes: 10285 (61.8%)     \n",
      "\tprocessed genes: 10304 (61.9%)     \n",
      "\tprocessed genes: 10320 (62%)     \n",
      "\tprocessed genes: 10348 (62.2%)     \n",
      "\tprocessed genes: 10367 (62.3%)     \n",
      "\tprocessed genes: 10390 (62.5%)     \n",
      "\tprocessed genes: 10443 (62.8%)     \n",
      "\tprocessed genes: 10477 (63%)     \n",
      "\tprocessed genes: 10507 (63.2%)     \n",
      "\tprocessed genes: 10544 (63.4%)     \n",
      "\tprocessed genes: 10593 (63.7%)     \n",
      "\tprocessed genes: 10622 (63.8%)     \n",
      "\tprocessed genes: 10649 (64%)     \n",
      "\tprocessed genes: 10686 (64.2%)     \n",
      "\tprocessed genes: 10712 (64.4%)     \n",
      "\tprocessed genes: 10737 (64.5%)     \n",
      "\tprocessed genes: 10771 (64.7%)     \n",
      "\tprocessed genes: 10788 (64.8%)     \n",
      "\tprocessed genes: 10810 (65%)     \n",
      "\tprocessed genes: 10829 (65.1%)     \n",
      "\tprocessed genes: 10859 (65.3%)     \n",
      "\tprocessed genes: 10893 (65.5%)     \n",
      "\tprocessed genes: 10917 (65.6%)     \n",
      "\tprocessed genes: 10951 (65.8%)     \n",
      "\tprocessed genes: 10983 (66%)     \n",
      "\tprocessed genes: 11001 (66.1%)     \n",
      "\tprocessed genes: 11007 (66.2%)     \n",
      "\tprocessed genes: 11011 (66.2%)     \n",
      "\tprocessed genes: 11037 (66.3%)     \n",
      "\tprocessed genes: 11069 (66.5%)     \n",
      "\tprocessed genes: 11092 (66.7%)     \n",
      "\tprocessed genes: 11103 (66.7%)     \n",
      "\tprocessed genes: 11118 (66.8%)     \n",
      "\tprocessed genes: 11142 (67%)     \n",
      "\tprocessed genes: 11170 (67.1%)     \n",
      "\tprocessed genes: 11205 (67.3%)     \n",
      "\tprocessed genes: 11231 (67.5%)     \n",
      "\tprocessed genes: 11252 (67.6%)     \n",
      "\tprocessed genes: 11261 (67.7%)     \n",
      "\tprocessed genes: 11277 (67.8%)     \n",
      "\tprocessed genes: 11285 (67.8%)     \n",
      "\tprocessed genes: 11297 (67.9%)     \n",
      "\tprocessed genes: 11310 (68%)     \n",
      "\tprocessed genes: 11331 (68.1%)     \n",
      "\tprocessed genes: 11387 (68.4%)     \n",
      "\tprocessed genes: 11423 (68.7%)     \n",
      "\tprocessed genes: 11463 (68.9%)     \n",
      "\tprocessed genes: 11478 (69%)     \n",
      "\tprocessed genes: 11518 (69.2%)     \n",
      "\tprocessed genes: 11559 (69.5%)     \n",
      "\tprocessed genes: 11597 (69.7%)     \n",
      "\tprocessed genes: 11618 (69.8%)     \n",
      "\tprocessed genes: 11640 (70%)     \n",
      "\tprocessed genes: 11664 (70.1%)     \n",
      "\tprocessed genes: 11698 (70.3%)     \n",
      "\tprocessed genes: 11724 (70.5%)     \n",
      "\tprocessed genes: 11736 (70.5%)     \n",
      "\tprocessed genes: 11757 (70.7%)     \n",
      "\tprocessed genes: 11774 (70.8%)     \n",
      "\tprocessed genes: 11797 (70.9%)     \n",
      "\tprocessed genes: 11815 (71%)     \n",
      "\tprocessed genes: 11853 (71.2%)     \n",
      "\tprocessed genes: 11892 (71.5%)     \n",
      "\tprocessed genes: 11900 (71.5%)     \n",
      "\tprocessed genes: 11912 (71.6%)     \n",
      "\tprocessed genes: 11949 (71.8%)     \n",
      "\tprocessed genes: 11996 (72.1%)     \n",
      "\tprocessed genes: 12036 (72.3%)     \n",
      "\tprocessed genes: 12064 (72.5%)     \n",
      "\tprocessed genes: 12086 (72.6%)     \n",
      "\tprocessed genes: 12100 (72.7%)     \n",
      "\tprocessed genes: 12114 (72.8%)     \n",
      "\tprocessed genes: 12139 (73%)     \n",
      "\tprocessed genes: 12166 (73.1%)     \n",
      "\tprocessed genes: 12179 (73.2%)     \n",
      "\tprocessed genes: 12188 (73.3%)     \n",
      "\tprocessed genes: 12236 (73.5%)     \n",
      "\tprocessed genes: 12264 (73.7%)     \n",
      "\tprocessed genes: 12283 (73.8%)     \n",
      "\tprocessed genes: 12305 (74%)     \n",
      "\tprocessed genes: 12317 (74%)     \n",
      "\tprocessed genes: 12341 (74.2%)     \n",
      "\tprocessed genes: 12360 (74.3%)     \n",
      "\tprocessed genes: 12373 (74.4%)     \n",
      "\tprocessed genes: 12387 (74.5%)     \n",
      "\tprocessed genes: 12451 (74.8%)     \n",
      "\tprocessed genes: 12504 (75.2%)     \n",
      "\tprocessed genes: 12522 (75.3%)     \n",
      "\tprocessed genes: 12540 (75.4%)     \n",
      "\tprocessed genes: 12550 (75.4%)     \n",
      "\tprocessed genes: 12567 (75.5%)     \n",
      "\tprocessed genes: 12582 (75.6%)     \n",
      "\tprocessed genes: 12598 (75.7%)     \n",
      "\tprocessed genes: 12620 (75.9%)     \n",
      "\tprocessed genes: 12647 (76%)     \n",
      "\tprocessed genes: 12666 (76.1%)     \n",
      "\tprocessed genes: 12676 (76.2%)     \n",
      "\tprocessed genes: 12746 (76.6%)     \n",
      "\tprocessed genes: 12790 (76.9%)     \n",
      "\tprocessed genes: 12830 (77.1%)     \n",
      "\tprocessed genes: 12848 (77.2%)     \n",
      "\tprocessed genes: 12927 (77.7%)     \n",
      "\tprocessed genes: 12958 (77.9%)     \n",
      "\tprocessed genes: 12972 (78%)     \n",
      "\tprocessed genes: 12987 (78.1%)     \n",
      "\tprocessed genes: 12988 (78.1%)     \n",
      "\tprocessed genes: 13000 (78.1%)     \n",
      "\tprocessed genes: 13005 (78.2%)     \n",
      "\tprocessed genes: 13019 (78.3%)     \n",
      "\tprocessed genes: 13026 (78.3%)     \n",
      "\tprocessed genes: 13045 (78.4%)     \n",
      "\tprocessed genes: 13065 (78.5%)     \n",
      "\tprocessed genes: 13095 (78.7%)     \n",
      "\tprocessed genes: 13122 (78.9%)     \n",
      "\tprocessed genes: 13141 (79%)     \n",
      "\tprocessed genes: 13154 (79.1%)     \n",
      "\tprocessed genes: 13178 (79.2%)     \n",
      "\tprocessed genes: 13196 (79.3%)     \n",
      "\tprocessed genes: 13218 (79.4%)     \n",
      "\tprocessed genes: 13250 (79.6%)     \n",
      "\tprocessed genes: 13274 (79.8%)     \n",
      "\tprocessed genes: 13281 (79.8%)     \n",
      "\tprocessed genes: 13288 (79.9%)     \n",
      "\tprocessed genes: 13298 (79.9%)     \n",
      "\tprocessed genes: 13305 (80%)     \n",
      "\tprocessed genes: 13340 (80.2%)     \n",
      "\tprocessed genes: 13380 (80.4%)     \n",
      "\tprocessed genes: 13450 (80.8%)     \n",
      "\tprocessed genes: 13453 (80.9%)     \n",
      "\tprocessed genes: 13499 (81.1%)     \n",
      "\tprocessed genes: 13530 (81.3%)     \n",
      "\tprocessed genes: 13579 (81.6%)     \n",
      "\tprocessed genes: 13620 (81.9%)     \n",
      "\tprocessed genes: 13659 (82.1%)     \n",
      "\tprocessed genes: 13684 (82.3%)     \n",
      "\tprocessed genes: 13717 (82.4%)     \n",
      "\tprocessed genes: 13758 (82.7%)     \n",
      "\tprocessed genes: 13779 (82.8%)     \n",
      "\tprocessed genes: 13818 (83.1%)     \n",
      "\tprocessed genes: 13840 (83.2%)     \n",
      "\tprocessed genes: 13874 (83.4%)     \n",
      "\tprocessed genes: 13885 (83.5%)     \n",
      "\tprocessed genes: 13903 (83.6%)     \n",
      "\tprocessed genes: 13957 (83.9%)     \n",
      "\tprocessed genes: 13989 (84.1%)     \n",
      "\tprocessed genes: 14001 (84.2%)     \n",
      "\tprocessed genes: 14011 (84.2%)     \n",
      "\tprocessed genes: 14022 (84.3%)     \n",
      "\tprocessed genes: 14035 (84.4%)     \n",
      "\tprocessed genes: 14068 (84.6%)     \n",
      "\tprocessed genes: 14080 (84.6%)     \n",
      "\tprocessed genes: 14097 (84.7%)     \n",
      "\tprocessed genes: 14105 (84.8%)     \n",
      "\tprocessed genes: 14119 (84.9%)     \n",
      "\tprocessed genes: 14138 (85%)     \n",
      "\tprocessed genes: 14171 (85.2%)     \n",
      "\tprocessed genes: 14204 (85.4%)     \n",
      "\tprocessed genes: 14222 (85.5%)     \n",
      "\tprocessed genes: 14235 (85.6%)     \n",
      "\tprocessed genes: 14254 (85.7%)     \n",
      "\tprocessed genes: 14273 (85.8%)     \n",
      "\tprocessed genes: 14297 (85.9%)     \n",
      "\tprocessed genes: 14323 (86.1%)     \n",
      "\tprocessed genes: 14380 (86.4%)     \n",
      "\tprocessed genes: 14418 (86.7%)     \n",
      "\tprocessed genes: 14439 (86.8%)     \n",
      "\tprocessed genes: 14461 (86.9%)     \n",
      "\tprocessed genes: 14482 (87%)     \n",
      "\tprocessed genes: 14492 (87.1%)     \n",
      "\tprocessed genes: 14514 (87.2%)     \n",
      "\tprocessed genes: 14524 (87.3%)     \n",
      "\tprocessed genes: 14541 (87.4%)     \n",
      "\tprocessed genes: 14559 (87.5%)     \n",
      "\tprocessed genes: 14571 (87.6%)     \n",
      "\tprocessed genes: 14598 (87.7%)     \n",
      "\tprocessed genes: 14622 (87.9%)     \n",
      "\tprocessed genes: 14652 (88.1%)     \n",
      "\tprocessed genes: 14686 (88.3%)     \n",
      "\tprocessed genes: 14714 (88.4%)     \n",
      "\tprocessed genes: 14735 (88.6%)     \n",
      "\tprocessed genes: 14759 (88.7%)     \n",
      "\tprocessed genes: 14778 (88.8%)     \n",
      "\tprocessed genes: 14800 (89%)     \n",
      "\tprocessed genes: 14826 (89.1%)     \n",
      "\tprocessed genes: 14855 (89.3%)     \n",
      "\tprocessed genes: 14902 (89.6%)     \n",
      "\tprocessed genes: 14960 (89.9%)     \n",
      "\tprocessed genes: 14991 (90.1%)     \n",
      "\tprocessed genes: 15020 (90.3%)     \n",
      "\tprocessed genes: 15056 (90.5%)     \n",
      "\tprocessed genes: 15083 (90.7%)     \n",
      "\tprocessed genes: 15127 (90.9%)     \n",
      "\tprocessed genes: 15155 (91.1%)     \n",
      "\tprocessed genes: 15182 (91.3%)     \n",
      "\tprocessed genes: 15211 (91.4%)     \n",
      "\tprocessed genes: 15235 (91.6%)     \n",
      "\tprocessed genes: 15255 (91.7%)     \n",
      "\tprocessed genes: 15279 (91.8%)     \n",
      "\tprocessed genes: 15308 (92%)     \n",
      "\tprocessed genes: 15325 (92.1%)     \n",
      "\tprocessed genes: 15346 (92.2%)     \n",
      "\tprocessed genes: 15371 (92.4%)     \n",
      "\tprocessed genes: 15390 (92.5%)     \n",
      "\tprocessed genes: 15405 (92.6%)     \n",
      "\tprocessed genes: 15421 (92.7%)     \n",
      "\tprocessed genes: 15432 (92.8%)     \n",
      "\tprocessed genes: 15460 (92.9%)     \n",
      "\tprocessed genes: 15476 (93%)     \n",
      "\tprocessed genes: 15501 (93.2%)     \n",
      "\tprocessed genes: 15508 (93.2%)     \n",
      "\tprocessed genes: 15520 (93.3%)     \n",
      "\tprocessed genes: 15537 (93.4%)     \n",
      "\tprocessed genes: 15564 (93.6%)     \n",
      "\tprocessed genes: 15603 (93.8%)     \n",
      "\tprocessed genes: 15639 (94%)     \n",
      "\tprocessed genes: 15660 (94.1%)     \n",
      "\tprocessed genes: 15672 (94.2%)     \n",
      "\tprocessed genes: 15683 (94.3%)     \n",
      "\tprocessed genes: 15688 (94.3%)     \n",
      "\tprocessed genes: 15711 (94.4%)     \n",
      "\tprocessed genes: 15735 (94.6%)     \n",
      "\tprocessed genes: 15808 (95%)     \n",
      "\tprocessed genes: 15842 (95.2%)     \n",
      "\tprocessed genes: 15862 (95.3%)     \n",
      "\tprocessed genes: 15921 (95.7%)     \n",
      "\tprocessed genes: 15932 (95.8%)     \n",
      "\tprocessed genes: 15951 (95.9%)     \n",
      "\tprocessed genes: 15962 (95.9%)     \n",
      "\tprocessed genes: 16000 (96.2%)     \n",
      "\tprocessed genes: 16004 (96.2%)     \n",
      "\tprocessed genes: 16032 (96.4%)     \n",
      "\tprocessed genes: 16065 (96.6%)     \n",
      "\tprocessed genes: 16098 (96.8%)     \n",
      "\tprocessed genes: 16129 (96.9%)     \n",
      "\tprocessed genes: 16146 (97%)     \n",
      "\tprocessed genes: 16156 (97.1%)     \n",
      "\tprocessed genes: 16167 (97.2%)     \n",
      "\tprocessed genes: 16179 (97.2%)     \n",
      "\tprocessed genes: 16198 (97.4%)     \n",
      "\tprocessed genes: 16221 (97.5%)     \n",
      "\tprocessed genes: 16234 (97.6%)     \n",
      "\tprocessed genes: 16264 (97.8%)     \n",
      "\tprocessed genes: 16307 (98%)     \n",
      "\tprocessed genes: 16343 (98.2%)     \n",
      "\tprocessed genes: 16353 (98.3%)     \n",
      "\tprocessed genes: 16380 (98.5%)     \n",
      "\tprocessed genes: 16417 (98.7%)     \n",
      "\tprocessed genes: 16430 (98.8%)     \n",
      "\tprocessed genes: 16462 (98.9%)     \n",
      "\tprocessed genes: 16505 (99.2%)     \n",
      "\tprocessed genes: 16529 (99.4%)     \n",
      "\tprocessed genes: 16561 (99.5%)     \n",
      "\tprocessed genes: 16573 (99.6%)     \n",
      "\tprocessed genes: 16580 (99.7%)     \n",
      "\tprocessed genes: 16595 (99.7%)     \n",
      "\tprocessed genes: 16600 (99.8%)     \n",
      "\tprocessed genes: 16632 (100%)     \n",
      "                                                                                                                   \n",
      "\twriting gene analysis results to file ./data//model_pred/new_experiments/test_magma_out.genes.out\n",
      "\twriting intermediate output to file ./data//model_pred/new_experiments/test_magma_out.genes.raw\n",
      "\n",
      "\n",
      "End time is 21:34:07, Monday 25 Nov 2024 (elapsed: 00:26:36)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "from kgwas import KGWAS, KGWAS_Data\n",
    "data = KGWAS_Data(data_path = './data/')\n",
    "data.load_kg()\n",
    "\n",
    "data.load_external_gwas(example_file = True)\n",
    "data.process_gwas_file()\n",
    "data.prepare_split()\n",
    "\n",
    "run = KGWAS(data, device = 'cuda:7', exp_name = 'test')\n",
    "run.load_pretrained('./data/model/test')\n",
    "run.run_magma(path_to_magma = \"/dfs/user/kexinh/ggwas/magma\", bfile = \"/dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here is an example output of the MAGMA run:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>GENE</th>\n",
       "      <th>CHR</th>\n",
       "      <th>START</th>\n",
       "      <th>STOP</th>\n",
       "      <th>NSNPS</th>\n",
       "      <th>NPARAM</th>\n",
       "      <th>N</th>\n",
       "      <th>ZSTAT</th>\n",
       "      <th>P</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>148398</td>\n",
       "      <td>1</td>\n",
       "      <td>859993</td>\n",
       "      <td>879961</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>9988</td>\n",
       "      <td>-1.10670</td>\n",
       "      <td>0.865800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>26155</td>\n",
       "      <td>1</td>\n",
       "      <td>879583</td>\n",
       "      <td>894679</td>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "      <td>9988</td>\n",
       "      <td>0.36490</td>\n",
       "      <td>0.357590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>339451</td>\n",
       "      <td>1</td>\n",
       "      <td>895967</td>\n",
       "      <td>901099</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9988</td>\n",
       "      <td>1.40540</td>\n",
       "      <td>0.079955</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>84069</td>\n",
       "      <td>1</td>\n",
       "      <td>901872</td>\n",
       "      <td>910488</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9988</td>\n",
       "      <td>1.56410</td>\n",
       "      <td>0.058896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>84808</td>\n",
       "      <td>1</td>\n",
       "      <td>910579</td>\n",
       "      <td>917473</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>9988</td>\n",
       "      <td>1.25460</td>\n",
       "      <td>0.104820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16632</th>\n",
       "      <td>23542</td>\n",
       "      <td>22</td>\n",
       "      <td>51039131</td>\n",
       "      <td>51049979</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9988</td>\n",
       "      <td>1.15830</td>\n",
       "      <td>0.123360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16633</th>\n",
       "      <td>410</td>\n",
       "      <td>22</td>\n",
       "      <td>51061182</td>\n",
       "      <td>51066601</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>9988</td>\n",
       "      <td>-0.65834</td>\n",
       "      <td>0.744840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16634</th>\n",
       "      <td>85358</td>\n",
       "      <td>22</td>\n",
       "      <td>51113070</td>\n",
       "      <td>51171640</td>\n",
       "      <td>41</td>\n",
       "      <td>15</td>\n",
       "      <td>9988</td>\n",
       "      <td>-1.18500</td>\n",
       "      <td>0.881990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16635</th>\n",
       "      <td>101928892</td>\n",
       "      <td>22</td>\n",
       "      <td>51123086</td>\n",
       "      <td>51125473</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>9988</td>\n",
       "      <td>-0.14590</td>\n",
       "      <td>0.558000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16636</th>\n",
       "      <td>49</td>\n",
       "      <td>22</td>\n",
       "      <td>51176652</td>\n",
       "      <td>51183727</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9988</td>\n",
       "      <td>-0.42425</td>\n",
       "      <td>0.664310</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>16637 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            GENE  CHR     START      STOP  NSNPS  NPARAM     N    ZSTAT  \\\n",
       "0         148398    1    859993    879961      6       4  9988 -1.10670   \n",
       "1          26155    1    879583    894679      9       4  9988  0.36490   \n",
       "2         339451    1    895967    901099      1       1  9988  1.40540   \n",
       "3          84069    1    901872    910488      3       2  9988  1.56410   \n",
       "4          84808    1    910579    917473      2       1  9988  1.25460   \n",
       "...          ...  ...       ...       ...    ...     ...   ...      ...   \n",
       "16632      23542   22  51039131  51049979      3       2  9988  1.15830   \n",
       "16633        410   22  51061182  51066601      9       5  9988 -0.65834   \n",
       "16634      85358   22  51113070  51171640     41      15  9988 -1.18500   \n",
       "16635  101928892   22  51123086  51125473      2       1  9988 -0.14590   \n",
       "16636         49   22  51176652  51183727      1       1  9988 -0.42425   \n",
       "\n",
       "              P  \n",
       "0      0.865800  \n",
       "1      0.357590  \n",
       "2      0.079955  \n",
       "3      0.058896  \n",
       "4      0.104820  \n",
       "...         ...  \n",
       "16632  0.123360  \n",
       "16633  0.744840  \n",
       "16634  0.881990  \n",
       "16635  0.558000  \n",
       "16636  0.664310  \n",
       "\n",
       "[16637 rows x 9 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "pd.read_csv('./data/model_pred/new_experiments/test_magma_out.genes.out', sep = '\\s+')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To know which gene this gene id maps to, you can use the following file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>79501</td>\n",
       "      <td>1</td>\n",
       "      <td>69091</td>\n",
       "      <td>70008</td>\n",
       "      <td>+</td>\n",
       "      <td>OR4F5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100996442</td>\n",
       "      <td>1</td>\n",
       "      <td>142447</td>\n",
       "      <td>174392</td>\n",
       "      <td>-</td>\n",
       "      <td>LOC100996442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>729759</td>\n",
       "      <td>1</td>\n",
       "      <td>367659</td>\n",
       "      <td>368597</td>\n",
       "      <td>+</td>\n",
       "      <td>OR4F29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>81399</td>\n",
       "      <td>1</td>\n",
       "      <td>621096</td>\n",
       "      <td>622034</td>\n",
       "      <td>-</td>\n",
       "      <td>OR4F16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>148398</td>\n",
       "      <td>1</td>\n",
       "      <td>859993</td>\n",
       "      <td>879961</td>\n",
       "      <td>+</td>\n",
       "      <td>SAMD11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19422</th>\n",
       "      <td>442867</td>\n",
       "      <td>Y</td>\n",
       "      <td>26764151</td>\n",
       "      <td>26785354</td>\n",
       "      <td>+</td>\n",
       "      <td>BPY2B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19423</th>\n",
       "      <td>57054</td>\n",
       "      <td>Y</td>\n",
       "      <td>26909216</td>\n",
       "      <td>26959639</td>\n",
       "      <td>-</td>\n",
       "      <td>DAZ3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19424</th>\n",
       "      <td>57135</td>\n",
       "      <td>Y</td>\n",
       "      <td>26979967</td>\n",
       "      <td>27053187</td>\n",
       "      <td>+</td>\n",
       "      <td>DAZ4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19425</th>\n",
       "      <td>442868</td>\n",
       "      <td>Y</td>\n",
       "      <td>27177048</td>\n",
       "      <td>27198251</td>\n",
       "      <td>-</td>\n",
       "      <td>BPY2C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19426</th>\n",
       "      <td>9085</td>\n",
       "      <td>Y</td>\n",
       "      <td>27768264</td>\n",
       "      <td>27771049</td>\n",
       "      <td>+</td>\n",
       "      <td>CDY1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>19427 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               0  1         2         3  4             5\n",
       "0          79501  1     69091     70008  +         OR4F5\n",
       "1      100996442  1    142447    174392  -  LOC100996442\n",
       "2         729759  1    367659    368597  +        OR4F29\n",
       "3          81399  1    621096    622034  -        OR4F16\n",
       "4         148398  1    859993    879961  +        SAMD11\n",
       "...          ... ..       ...       ... ..           ...\n",
       "19422     442867  Y  26764151  26785354  +         BPY2B\n",
       "19423      57054  Y  26909216  26959639  -          DAZ3\n",
       "19424      57135  Y  26979967  27053187  +          DAZ4\n",
       "19425     442868  Y  27177048  27198251  -         BPY2C\n",
       "19426       9085  Y  27768264  27771049  +          CDY1\n",
       "\n",
       "[19427 rows x 6 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('./data/misc_data/NCBI37.3.gene.loc', sep = '\\t', header = None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "a100_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}