--- a +++ b/demo/run_magma.ipynb @@ -0,0 +1,1201 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run MAGMA on KGWAS Sumstats to get gene-based p-values\n", + "Given the local bfile where it stores the genotype data, you can run MAGMA to get gene-based p-values. Here is an example code to run it assuming you have (1) saved a trained model under `./data/model/test` (2) downloaded magma executable from [here](https://cncr.nl/research/magma/) (3) have a genotype file for your cohort" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All required data files are present.\n", + "--loading KG---\n", + "--using enformer SNP embedding--\n", + "--using random go embedding--\n", + "--using ESM gene embedding--\n", + "Loading example GWAS file...\n", + "Example file already exists locally.\n", + "Loading GWAS file from ./data/biochemistry_Creatinine_fastgwa_full_10000_1.fastGWA...\n", + "Using ldsc weight...\n", + "ldsc_weight mean: 0.9999999999999993\n", + "Annotation file already exists locally.\n", + "MAGMA command executed successfully.\n", + "Output: Welcome to MAGMA v1.10 (linux)\n", + "Using flags:\n", + "\t--bfile /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1\n", + "\t--gene-annot ./data/gene_annotation.genes.annot\n", + "\t--pval ./data//model_pred/new_experiments/test_magma_format.csv N=9988\n", + "\t--out ./data//model_pred/new_experiments/test_magma_out\n", + "\n", + "Start time is 21:07:31, Monday 25 Nov 2024\n", + "\n", + "Loading PLINK-format data...\n", + "Reading file /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1.fam... 48769 individuals read\n", + "Reading file /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1.bim... 784256 SNPs read\n", + "Preparing file /dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1.bed... \n", + "\n", + "Reading SNP p-values from file ./data//model_pred/new_experiments/test_magma_format.csv... \n", + "\tdetected 2 variables in file\n", + "\tusing variable: SNP (SNP id)\n", + "\tusing variable: P (p-value)\n", + "\tread 542759 lines from file, containing valid SNP p-values for 542758 SNPs in data (100% of lines, 69.21% of SNPs in data)\n", + "Loading gene annotation from file ./data/gene_annotation.genes.annot... \n", + "\t17919 gene definitions read from file\n", + "\tfound 16637 genes containing valid SNPs in genotype data\n", + "\n", + "\n", + "Starting gene analysis... \n", + "\tusing model: SNPwise-mean\n", + "\tengaging automatic 'big-data' mode\n", + "\tprocessed genes: 50 (0.3%) \n", + "\tprocessed genes: 68 (0.4%) \n", + "\tprocessed genes: 82 (0.5%) \n", + "\tprocessed genes: 110 (0.7%) \n", + "\tprocessed genes: 134 (0.8%) \n", + "\tprocessed genes: 147 (0.9%) \n", + "\tprocessed genes: 176 (1.1%) \n", + "\tprocessed genes: 192 (1.2%) \n", + "\tprocessed genes: 208 (1.3%) \n", + "\tprocessed genes: 227 (1.4%) \n", + "\tprocessed genes: 243 (1.5%) \n", + "\tprocessed genes: 273 (1.6%) \n", + "\tprocessed genes: 313 (1.9%) \n", + "\tprocessed genes: 351 (2.1%) \n", + "\tprocessed genes: 398 (2.4%) \n", + "\tprocessed genes: 435 (2.6%) \n", + "\tprocessed genes: 478 (2.9%) \n", + "\tprocessed genes: 507 (3%) \n", + "\tprocessed genes: 530 (3.2%) \n", + "\tprocessed genes: 569 (3.4%) \n", + "\tprocessed genes: 585 (3.5%) \n", + "\tprocessed genes: 622 (3.7%) \n", + "\tprocessed genes: 640 (3.8%) \n", + "\tprocessed genes: 652 (3.9%) \n", + "\tprocessed genes: 668 (4%) \n", + "\tprocessed genes: 686 (4.1%) \n", + "\tprocessed genes: 708 (4.3%) \n", + "\tprocessed genes: 743 (4.5%) \n", + "\tprocessed genes: 786 (4.7%) \n", + "\tprocessed genes: 802 (4.8%) \n", + "\tprocessed genes: 846 (5.1%) \n", + "\tprocessed genes: 888 (5.3%) \n", + "\tprocessed genes: 910 (5.5%) \n", + "\tprocessed genes: 950 (5.7%) \n", + "\tprocessed genes: 1066 (6.4%) \n", + "\tprocessed genes: 1099 (6.6%) \n", + "\tprocessed genes: 1142 (6.9%) \n", + "\tprocessed genes: 1167 (7%) \n", + "\tprocessed genes: 1206 (7.2%) \n", + "\tprocessed genes: 1238 (7.4%) \n", + "\tprocessed genes: 1261 (7.6%) \n", + "\tprocessed genes: 1275 (7.7%) \n", + "\tprocessed genes: 1302 (7.8%) \n", + "\tprocessed genes: 1332 (8%) \n", + "\tprocessed genes: 1346 (8.1%) \n", + "\tprocessed genes: 1380 (8.3%) \n", + "\tprocessed genes: 1402 (8.4%) \n", + "\tprocessed genes: 1444 (8.7%) \n", + "\tprocessed genes: 1484 (8.9%) \n", + "\tprocessed genes: 1505 (9%) \n", + "\tprocessed genes: 1532 (9.2%) \n", + "\tprocessed genes: 1556 (9.4%) \n", + "\tprocessed genes: 1572 (9.4%) \n", + "\tprocessed genes: 1609 (9.7%) \n", + "\tprocessed genes: 1648 (9.9%) \n", + "\tprocessed genes: 1668 (10%) \n", + "\tprocessed genes: 1677 (10.1%) \n", + "\tprocessed genes: 1694 (10.2%) \n", + "\tprocessed genes: 1699 (10.2%) \n", + "\tprocessed genes: 1718 (10.3%) \n", + "\tprocessed genes: 1722 (10.4%) \n", + "\tprocessed genes: 1761 (10.6%) \n", + "\tprocessed genes: 1783 (10.7%) \n", + "\tprocessed genes: 1821 (10.9%) \n", + "\tprocessed genes: 1862 (11.2%) \n", + "\tprocessed genes: 1906 (11.5%) \n", + "\tprocessed genes: 1916 (11.5%) \n", + "\tprocessed genes: 1927 (11.6%) \n", + "\tprocessed genes: 1942 (11.7%) \n", + "\tprocessed genes: 1973 (11.9%) \n", + "\tprocessed genes: 1986 (11.9%) \n", + "\tprocessed genes: 2005 (12.1%) \n", + "\tprocessed genes: 2019 (12.1%) \n", + "\tprocessed genes: 2062 (12.4%) \n", + "\tprocessed genes: 2103 (12.6%) \n", + "\tprocessed genes: 2138 (12.9%) \n", + "\tprocessed genes: 2147 (12.9%) \n", + "\tprocessed genes: 2228 (13.4%) \n", + "\tprocessed genes: 2254 (13.5%) \n", + "\tprocessed genes: 2285 (13.7%) \n", + "\tprocessed genes: 2313 (13.9%) \n", + "\tprocessed genes: 2346 (14.1%) \n", + "\tprocessed genes: 2374 (14.3%) \n", + "\tprocessed genes: 2391 (14.4%) \n", + "\tprocessed genes: 2412 (14.5%) \n", + "\tprocessed genes: 2431 (14.6%) \n", + "\tprocessed genes: 2450 (14.7%) \n", + "\tprocessed genes: 2462 (14.8%) \n", + "\tprocessed genes: 2480 (14.9%) \n", + "\tprocessed genes: 2502 (15%) \n", + "\tprocessed genes: 2530 (15.2%) \n", + "\tprocessed genes: 2551 (15.3%) \n", + "\tprocessed genes: 2585 (15.5%) \n", + "\tprocessed genes: 2629 (15.8%) \n", + "\tprocessed genes: 2644 (15.9%) \n", + "\tprocessed genes: 2668 (16%) \n", + "\tprocessed genes: 2686 (16.1%) \n", + "\tprocessed genes: 2737 (16.5%) \n", + "\tprocessed genes: 2760 (16.6%) \n", + "\tprocessed genes: 2776 (16.7%) \n", + "\tprocessed genes: 2806 (16.9%) \n", + "\tprocessed genes: 2818 (16.9%) \n", + "\tprocessed genes: 2824 (17%) \n", + "\tprocessed genes: 2842 (17.1%) \n", + "\tprocessed genes: 2875 (17.3%) \n", + "\tprocessed genes: 2881 (17.3%) \n", + "\tprocessed genes: 2892 (17.4%) \n", + "\tprocessed genes: 2928 (17.6%) \n", + "\tprocessed genes: 2945 (17.7%) \n", + "\tprocessed genes: 2957 (17.8%) \n", + "\tprocessed genes: 2975 (17.9%) \n", + "\tprocessed genes: 2984 (17.9%) \n", + "\tprocessed genes: 3002 (18%) \n", + "\tprocessed genes: 3025 (18.2%) \n", + "\tprocessed genes: 3058 (18.4%) \n", + "\tprocessed genes: 3076 (18.5%) \n", + "\tprocessed genes: 3115 (18.7%) \n", + "\tprocessed genes: 3167 (19%) \n", + "\tprocessed genes: 3207 (19.3%) \n", + "\tprocessed genes: 3249 (19.5%) \n", + "\tprocessed genes: 3254 (19.6%) \n", + "\tprocessed genes: 3266 (19.6%) \n", + "\tprocessed genes: 3281 (19.7%) \n", + "\tprocessed genes: 3285 (19.7%) \n", + "\tprocessed genes: 3296 (19.8%) \n", + "\tprocessed genes: 3314 (19.9%) \n", + "\tprocessed genes: 3317 (19.9%) \n", + "\tprocessed genes: 3366 (20.2%) \n", + "\tprocessed genes: 3406 (20.5%) \n", + "\tprocessed genes: 3424 (20.6%) \n", + "\tprocessed genes: 3462 (20.8%) \n", + "\tprocessed genes: 3479 (20.9%) \n", + "\tprocessed genes: 3495 (21%) \n", + "\tprocessed genes: 3530 (21.2%) \n", + "\tprocessed genes: 3543 (21.3%) \n", + "\tprocessed genes: 3562 (21.4%) \n", + "\tprocessed genes: 3593 (21.6%) \n", + "\tprocessed genes: 3610 (21.7%) \n", + "\tprocessed genes: 3654 (22%) \n", + "\tprocessed genes: 3675 (22.1%) \n", + "\tprocessed genes: 3697 (22.2%) \n", + "\tprocessed genes: 3722 (22.4%) \n", + "\tprocessed genes: 3724 (22.4%) \n", + "\tprocessed genes: 3775 (22.7%) \n", + "\tprocessed genes: 3795 (22.8%) \n", + "\tprocessed genes: 3802 (22.9%) \n", + "\tprocessed genes: 3810 (22.9%) \n", + "\tprocessed genes: 3827 (23%) \n", + "\tprocessed genes: 3857 (23.2%) \n", + "\tprocessed genes: 3906 (23.5%) \n", + "\tprocessed genes: 3921 (23.6%) \n", + "\tprocessed genes: 3927 (23.6%) \n", + "\tprocessed genes: 3935 (23.7%) \n", + "\tprocessed genes: 3945 (23.7%) \n", + "\tprocessed genes: 3967 (23.8%) \n", + "\tprocessed genes: 3978 (23.9%) \n", + "\tprocessed genes: 3996 (24%) \n", + "\tprocessed genes: 4024 (24.2%) \n", + "\tprocessed genes: 4035 (24.3%) \n", + "\tprocessed genes: 4070 (24.5%) \n", + "\tprocessed genes: 4097 (24.6%) \n", + "\tprocessed genes: 4152 (25%) \n", + "\tprocessed genes: 4179 (25.1%) \n", + "\tprocessed genes: 4208 (25.3%) \n", + "\tprocessed genes: 4233 (25.4%) \n", + "\tprocessed genes: 4240 (25.5%) \n", + "\tprocessed genes: 4270 (25.7%) \n", + "\tprocessed genes: 4293 (25.8%) \n", + "\tprocessed genes: 4313 (25.9%) \n", + "\tprocessed genes: 4371 (26.3%) \n", + "\tprocessed genes: 4402 (26.5%) \n", + "\tprocessed genes: 4418 (26.6%) \n", + "\tprocessed genes: 4447 (26.7%) \n", + "\tprocessed genes: 4466 (26.8%) \n", + "\tprocessed genes: 4483 (26.9%) \n", + "\tprocessed genes: 4503 (27.1%) \n", + "\tprocessed genes: 4534 (27.3%) \n", + "\tprocessed genes: 4570 (27.5%) \n", + "\tprocessed genes: 4584 (27.6%) \n", + "\tprocessed genes: 4592 (27.6%) \n", + "\tprocessed genes: 4611 (27.7%) \n", + "\tprocessed genes: 4637 (27.9%) \n", + "\tprocessed genes: 4676 (28.1%) \n", + "\tprocessed genes: 4705 (28.3%) \n", + "\tprocessed genes: 4748 (28.5%) \n", + "\tprocessed genes: 4769 (28.7%) \n", + "\tprocessed genes: 4785 (28.8%) \n", + "\tprocessed genes: 4798 (28.8%) \n", + "\tprocessed genes: 4820 (29%) \n", + "\tprocessed genes: 4857 (29.2%) \n", + "\tprocessed genes: 4881 (29.3%) \n", + "\tprocessed genes: 4913 (29.5%) \n", + "\tprocessed genes: 4958 (29.8%) \n", + "\tprocessed genes: 4988 (30%) \n", + "\tprocessed genes: 5019 (30.2%) \n", + "\tprocessed genes: 5033 (30.3%) \n", + "\tprocessed genes: 5045 (30.3%) \n", + "\tprocessed genes: 5049 (30.3%) \n", + "\tprocessed genes: 5053 (30.4%) \n", + "\tprocessed genes: 5059 (30.4%) \n", + "\tprocessed genes: 5074 (30.5%) \n", + "\tprocessed genes: 5078 (30.5%) \n", + "\tprocessed genes: 5091 (30.6%) \n", + "\tprocessed genes: 5113 (30.7%) \n", + "\tprocessed genes: 5137 (30.9%) \n", + "\tprocessed genes: 5149 (30.9%) \n", + "\tprocessed genes: 5169 (31.1%) \n", + "\tprocessed genes: 5197 (31.2%) \n", + "\tprocessed genes: 5208 (31.3%) \n", + "\tprocessed genes: 5234 (31.5%) \n", + "\tprocessed genes: 5277 (31.7%) \n", + "\tprocessed genes: 5298 (31.8%) \n", + "\tprocessed genes: 5324 (32%) \n", + "\tprocessed genes: 5342 (32.1%) \n", + "\tprocessed genes: 5357 (32.2%) \n", + "\tprocessed genes: 5375 (32.3%) \n", + "\tprocessed genes: 5395 (32.4%) \n", + "\tprocessed genes: 5420 (32.6%) \n", + "\tprocessed genes: 5468 (32.9%) \n", + "\tprocessed genes: 5493 (33%) \n", + "\tprocessed genes: 5511 (33.1%) \n", + "\tprocessed genes: 5522 (33.2%) \n", + "\tprocessed genes: 5533 (33.3%) \n", + "\tprocessed genes: 5541 (33.3%) \n", + "\tprocessed genes: 5546 (33.3%) \n", + "\tprocessed genes: 5553 (33.4%) \n", + "\tprocessed genes: 5563 (33.4%) \n", + "\tprocessed genes: 5578 (33.5%) \n", + "\tprocessed genes: 5590 (33.6%) \n", + "\tprocessed genes: 5598 (33.6%) \n", + "\tprocessed genes: 5609 (33.7%) \n", + "\tprocessed genes: 5611 (33.7%) \n", + "\tprocessed genes: 5618 (33.8%) \n", + "\tprocessed genes: 5623 (33.8%) \n", + "\tprocessed genes: 5629 (33.8%) \n", + "\tprocessed genes: 5632 (33.9%) \n", + "\tprocessed genes: 5645 (33.9%) \n", + "\tprocessed genes: 5655 (34%) \n", + "\tprocessed genes: 5664 (34%) \n", + "\tprocessed genes: 5672 (34.1%) \n", + "\tprocessed genes: 5686 (34.2%) \n", + "\tprocessed genes: 5698 (34.2%) \n", + "\tprocessed genes: 5709 (34.3%) \n", + "\tprocessed genes: 5720 (34.4%) \n", + "\tprocessed genes: 5749 (34.6%) \n", + "\tprocessed genes: 5792 (34.8%) \n", + "\tprocessed genes: 5805 (34.9%) \n", + "\tprocessed genes: 5843 (35.1%) \n", + "\tprocessed genes: 5864 (35.2%) \n", + "\tprocessed genes: 5885 (35.4%) \n", + "\tprocessed genes: 5932 (35.7%) \n", + "\tprocessed genes: 5969 (35.9%) \n", + "\tprocessed genes: 6004 (36.1%) \n", + "\tprocessed genes: 6043 (36.3%) \n", + "\tprocessed genes: 6061 (36.4%) \n", + "\tprocessed genes: 6081 (36.6%) \n", + "\tprocessed genes: 6111 (36.7%) \n", + "\tprocessed genes: 6138 (36.9%) \n", + "\tprocessed genes: 6155 (37%) \n", + "\tprocessed genes: 6174 (37.1%) \n", + "\tprocessed genes: 6183 (37.2%) \n", + "\tprocessed genes: 6194 (37.2%) \n", + "\tprocessed genes: 6203 (37.3%) \n", + "\tprocessed genes: 6227 (37.4%) \n", + "\tprocessed genes: 6230 (37.4%) \n", + "\tprocessed genes: 6240 (37.5%) \n", + "\tprocessed genes: 6281 (37.8%) \n", + "\tprocessed genes: 6293 (37.8%) \n", + "\tprocessed genes: 6317 (38%) \n", + "\tprocessed genes: 6332 (38.1%) \n", + "\tprocessed genes: 6342 (38.1%) \n", + "\tprocessed genes: 6356 (38.2%) \n", + "\tprocessed genes: 6377 (38.3%) \n", + "\tprocessed genes: 6405 (38.5%) \n", + "\tprocessed genes: 6426 (38.6%) \n", + "\tprocessed genes: 6438 (38.7%) \n", + "\tprocessed genes: 6452 (38.8%) \n", + "\tprocessed genes: 6464 (38.9%) \n", + "\tprocessed genes: 6499 (39.1%) \n", + "\tprocessed genes: 6518 (39.2%) \n", + "\tprocessed genes: 6564 (39.5%) \n", + "\tprocessed genes: 6605 (39.7%) \n", + "\tprocessed genes: 6606 (39.7%) \n", + "\tprocessed genes: 6616 (39.8%) \n", + "\tprocessed genes: 6649 (40%) \n", + "\tprocessed genes: 6700 (40.3%) \n", + "\tprocessed genes: 6751 (40.6%) \n", + "\tprocessed genes: 6770 (40.7%) \n", + "\tprocessed genes: 6782 (40.8%) \n", + "\tprocessed genes: 6799 (40.9%) \n", + "\tprocessed genes: 6832 (41.1%) \n", + "\tprocessed genes: 6889 (41.4%) \n", + "\tprocessed genes: 6900 (41.5%) \n", + "\tprocessed genes: 6921 (41.6%) \n", + "\tprocessed genes: 6941 (41.7%) \n", + "\tprocessed genes: 6955 (41.8%) \n", + "\tprocessed genes: 6984 (42%) \n", + "\tprocessed genes: 7037 (42.3%) \n", + "\tprocessed genes: 7056 (42.4%) \n", + "\tprocessed genes: 7065 (42.5%) \n", + "\tprocessed genes: 7071 (42.5%) \n", + "\tprocessed genes: 7088 (42.6%) \n", + "\tprocessed genes: 7106 (42.7%) \n", + "\tprocessed genes: 7108 (42.7%) \n", + "\tprocessed genes: 7124 (42.8%) \n", + "\tprocessed genes: 7143 (42.9%) \n", + "\tprocessed genes: 7163 (43.1%) \n", + "\tprocessed genes: 7182 (43.2%) \n", + "\tprocessed genes: 7206 (43.3%) \n", + "\tprocessed genes: 7223 (43.4%) \n", + "\tprocessed genes: 7258 (43.6%) \n", + "\tprocessed genes: 7285 (43.8%) \n", + "\tprocessed genes: 7318 (44%) \n", + "\tprocessed genes: 7344 (44.1%) \n", + "\tprocessed genes: 7356 (44.2%) \n", + "\tprocessed genes: 7408 (44.5%) \n", + "\tprocessed genes: 7459 (44.8%) \n", + "\tprocessed genes: 7484 (45%) \n", + "\tprocessed genes: 7502 (45.1%) \n", + "\tprocessed genes: 7524 (45.2%) \n", + "\tprocessed genes: 7553 (45.4%) \n", + "\tprocessed genes: 7567 (45.5%) \n", + "\tprocessed genes: 7575 (45.5%) \n", + "\tprocessed genes: 7617 (45.8%) \n", + "\tprocessed genes: 7657 (46%) \n", + "\tprocessed genes: 7667 (46.1%) \n", + "\tprocessed genes: 7682 (46.2%) \n", + "\tprocessed genes: 7684 (46.2%) \n", + "\tprocessed genes: 7697 (46.3%) \n", + "\tprocessed genes: 7708 (46.3%) \n", + "\tprocessed genes: 7735 (46.5%) \n", + "\tprocessed genes: 7803 (46.9%) \n", + "\tprocessed genes: 7835 (47.1%) \n", + "\tprocessed genes: 7853 (47.2%) \n", + "\tprocessed genes: 7876 (47.3%) \n", + "\tprocessed genes: 7911 (47.6%) \n", + "\tprocessed genes: 7942 (47.7%) \n", + "\tprocessed genes: 7967 (47.9%) \n", + "\tprocessed genes: 8006 (48.1%) \n", + "\tprocessed genes: 8022 (48.2%) \n", + "\tprocessed genes: 8046 (48.4%) \n", + "\tprocessed genes: 8059 (48.4%) \n", + "\tprocessed genes: 8061 (48.5%) \n", + "\tprocessed genes: 8099 (48.7%) \n", + "\tprocessed genes: 8133 (48.9%) \n", + "\tprocessed genes: 8184 (49.2%) \n", + "\tprocessed genes: 8196 (49.3%) \n", + "\tprocessed genes: 8207 (49.3%) \n", + "\tprocessed genes: 8229 (49.5%) \n", + "\tprocessed genes: 8234 (49.5%) \n", + "\tprocessed genes: 8248 (49.6%) \n", + "\tprocessed genes: 8269 (49.7%) \n", + "\tprocessed genes: 8306 (49.9%) \n", + "\tprocessed genes: 8316 (50%) \n", + "\tprocessed genes: 8341 (50.1%) \n", + "\tprocessed genes: 8354 (50.2%) \n", + "\tprocessed genes: 8364 (50.3%) \n", + "\tprocessed genes: 8373 (50.3%) \n", + "\tprocessed genes: 8390 (50.4%) \n", + "\tprocessed genes: 8401 (50.5%) \n", + "\tprocessed genes: 8410 (50.5%) \n", + "\tprocessed genes: 8426 (50.6%) \n", + "\tprocessed genes: 8450 (50.8%) \n", + "\tprocessed genes: 8497 (51.1%) \n", + "\tprocessed genes: 8517 (51.2%) \n", + "\tprocessed genes: 8539 (51.3%) \n", + "\tprocessed genes: 8555 (51.4%) \n", + "\tprocessed genes: 8580 (51.6%) \n", + "\tprocessed genes: 8591 (51.6%) \n", + "\tprocessed genes: 8623 (51.8%) \n", + "\tprocessed genes: 8626 (51.8%) \n", + "\tprocessed genes: 8645 (52%) \n", + "\tprocessed genes: 8663 (52.1%) \n", + "\tprocessed genes: 8703 (52.3%) \n", + "\tprocessed genes: 8732 (52.5%) \n", + "\tprocessed genes: 8760 (52.7%) \n", + "\tprocessed genes: 8792 (52.8%) \n", + "\tprocessed genes: 8828 (53.1%) \n", + "\tprocessed genes: 8847 (53.2%) \n", + "\tprocessed genes: 8866 (53.3%) \n", + "\tprocessed genes: 8889 (53.4%) \n", + "\tprocessed genes: 8919 (53.6%) \n", + "\tprocessed genes: 8936 (53.7%) \n", + "\tprocessed genes: 8944 (53.8%) \n", + "\tprocessed genes: 8958 (53.8%) \n", + "\tprocessed genes: 9022 (54.2%) \n", + "\tprocessed genes: 9043 (54.4%) \n", + "\tprocessed genes: 9070 (54.5%) \n", + "\tprocessed genes: 9100 (54.7%) \n", + "\tprocessed genes: 9128 (54.9%) \n", + "\tprocessed genes: 9147 (55%) \n", + "\tprocessed genes: 9159 (55.1%) \n", + "\tprocessed genes: 9172 (55.1%) \n", + "\tprocessed genes: 9179 (55.2%) \n", + "\tprocessed genes: 9186 (55.2%) \n", + "\tprocessed genes: 9195 (55.3%) \n", + "\tprocessed genes: 9215 (55.4%) \n", + "\tprocessed genes: 9241 (55.5%) \n", + "\tprocessed genes: 9246 (55.6%) \n", + "\tprocessed genes: 9277 (55.8%) \n", + "\tprocessed genes: 9300 (55.9%) \n", + "\tprocessed genes: 9312 (56%) \n", + "\tprocessed genes: 9363 (56.3%) \n", + "\tprocessed genes: 9466 (56.9%) \n", + "\tprocessed genes: 9503 (57.1%) \n", + "\tprocessed genes: 9543 (57.4%) \n", + "\tprocessed genes: 9575 (57.6%) \n", + "\tprocessed genes: 9625 (57.9%) \n", + "\tprocessed genes: 9674 (58.1%) \n", + "\tprocessed genes: 9697 (58.3%) \n", + "\tprocessed genes: 9719 (58.4%) \n", + "\tprocessed genes: 9750 (58.6%) \n", + "\tprocessed genes: 9774 (58.7%) \n", + "\tprocessed genes: 9784 (58.8%) \n", + "\tprocessed genes: 9792 (58.9%) \n", + "\tprocessed genes: 9831 (59.1%) \n", + "\tprocessed genes: 9845 (59.2%) \n", + "\tprocessed genes: 9882 (59.4%) \n", + "\tprocessed genes: 9923 (59.6%) \n", + "\tprocessed genes: 9950 (59.8%) \n", + "\tprocessed genes: 9981 (60%) \n", + "\tprocessed genes: 10006 (60.1%) \n", + "\tprocessed genes: 10019 (60.2%) \n", + "\tprocessed genes: 10058 (60.5%) \n", + "\tprocessed genes: 10076 (60.6%) \n", + "\tprocessed genes: 10093 (60.7%) \n", + "\tprocessed genes: 10095 (60.7%) \n", + "\tprocessed genes: 10096 (60.7%) \n", + "\tprocessed genes: 10120 (60.8%) \n", + "\tprocessed genes: 10130 (60.9%) \n", + "\tprocessed genes: 10139 (60.9%) \n", + "\tprocessed genes: 10150 (61%) \n", + "\tprocessed genes: 10185 (61.2%) \n", + "\tprocessed genes: 10211 (61.4%) \n", + "\tprocessed genes: 10242 (61.6%) \n", + "\tprocessed genes: 10266 (61.7%) \n", + "\tprocessed genes: 10285 (61.8%) \n", + "\tprocessed genes: 10304 (61.9%) \n", + "\tprocessed genes: 10320 (62%) \n", + "\tprocessed genes: 10348 (62.2%) \n", + "\tprocessed genes: 10367 (62.3%) \n", + "\tprocessed genes: 10390 (62.5%) \n", + "\tprocessed genes: 10443 (62.8%) \n", + "\tprocessed genes: 10477 (63%) \n", + "\tprocessed genes: 10507 (63.2%) \n", + "\tprocessed genes: 10544 (63.4%) \n", + "\tprocessed genes: 10593 (63.7%) \n", + "\tprocessed genes: 10622 (63.8%) \n", + "\tprocessed genes: 10649 (64%) \n", + "\tprocessed genes: 10686 (64.2%) \n", + "\tprocessed genes: 10712 (64.4%) \n", + "\tprocessed genes: 10737 (64.5%) \n", + "\tprocessed genes: 10771 (64.7%) \n", + "\tprocessed genes: 10788 (64.8%) \n", + "\tprocessed genes: 10810 (65%) \n", + "\tprocessed genes: 10829 (65.1%) \n", + "\tprocessed genes: 10859 (65.3%) \n", + "\tprocessed genes: 10893 (65.5%) \n", + "\tprocessed genes: 10917 (65.6%) \n", + "\tprocessed genes: 10951 (65.8%) \n", + "\tprocessed genes: 10983 (66%) \n", + "\tprocessed genes: 11001 (66.1%) \n", + "\tprocessed genes: 11007 (66.2%) \n", + "\tprocessed genes: 11011 (66.2%) \n", + "\tprocessed genes: 11037 (66.3%) \n", + "\tprocessed genes: 11069 (66.5%) \n", + "\tprocessed genes: 11092 (66.7%) \n", + "\tprocessed genes: 11103 (66.7%) \n", + "\tprocessed genes: 11118 (66.8%) \n", + "\tprocessed genes: 11142 (67%) \n", + "\tprocessed genes: 11170 (67.1%) \n", + "\tprocessed genes: 11205 (67.3%) \n", + "\tprocessed genes: 11231 (67.5%) \n", + "\tprocessed genes: 11252 (67.6%) \n", + "\tprocessed genes: 11261 (67.7%) \n", + "\tprocessed genes: 11277 (67.8%) \n", + "\tprocessed genes: 11285 (67.8%) \n", + "\tprocessed genes: 11297 (67.9%) \n", + "\tprocessed genes: 11310 (68%) \n", + "\tprocessed genes: 11331 (68.1%) \n", + "\tprocessed genes: 11387 (68.4%) \n", + "\tprocessed genes: 11423 (68.7%) \n", + "\tprocessed genes: 11463 (68.9%) \n", + "\tprocessed genes: 11478 (69%) \n", + "\tprocessed genes: 11518 (69.2%) \n", + "\tprocessed genes: 11559 (69.5%) \n", + "\tprocessed genes: 11597 (69.7%) \n", + "\tprocessed genes: 11618 (69.8%) \n", + "\tprocessed genes: 11640 (70%) \n", + "\tprocessed genes: 11664 (70.1%) \n", + "\tprocessed genes: 11698 (70.3%) \n", + "\tprocessed genes: 11724 (70.5%) \n", + "\tprocessed genes: 11736 (70.5%) \n", + "\tprocessed genes: 11757 (70.7%) \n", + "\tprocessed genes: 11774 (70.8%) \n", + "\tprocessed genes: 11797 (70.9%) \n", + "\tprocessed genes: 11815 (71%) \n", + "\tprocessed genes: 11853 (71.2%) \n", + "\tprocessed genes: 11892 (71.5%) \n", + "\tprocessed genes: 11900 (71.5%) \n", + "\tprocessed genes: 11912 (71.6%) \n", + "\tprocessed genes: 11949 (71.8%) \n", + "\tprocessed genes: 11996 (72.1%) \n", + "\tprocessed genes: 12036 (72.3%) \n", + "\tprocessed genes: 12064 (72.5%) \n", + "\tprocessed genes: 12086 (72.6%) \n", + "\tprocessed genes: 12100 (72.7%) \n", + "\tprocessed genes: 12114 (72.8%) \n", + "\tprocessed genes: 12139 (73%) \n", + "\tprocessed genes: 12166 (73.1%) \n", + "\tprocessed genes: 12179 (73.2%) \n", + "\tprocessed genes: 12188 (73.3%) \n", + "\tprocessed genes: 12236 (73.5%) \n", + "\tprocessed genes: 12264 (73.7%) \n", + "\tprocessed genes: 12283 (73.8%) \n", + "\tprocessed genes: 12305 (74%) \n", + "\tprocessed genes: 12317 (74%) \n", + "\tprocessed genes: 12341 (74.2%) \n", + "\tprocessed genes: 12360 (74.3%) \n", + "\tprocessed genes: 12373 (74.4%) \n", + "\tprocessed genes: 12387 (74.5%) \n", + "\tprocessed genes: 12451 (74.8%) \n", + "\tprocessed genes: 12504 (75.2%) \n", + "\tprocessed genes: 12522 (75.3%) \n", + "\tprocessed genes: 12540 (75.4%) \n", + "\tprocessed genes: 12550 (75.4%) \n", + "\tprocessed genes: 12567 (75.5%) \n", + "\tprocessed genes: 12582 (75.6%) \n", + "\tprocessed genes: 12598 (75.7%) \n", + "\tprocessed genes: 12620 (75.9%) \n", + "\tprocessed genes: 12647 (76%) \n", + "\tprocessed genes: 12666 (76.1%) \n", + "\tprocessed genes: 12676 (76.2%) \n", + "\tprocessed genes: 12746 (76.6%) \n", + "\tprocessed genes: 12790 (76.9%) \n", + "\tprocessed genes: 12830 (77.1%) \n", + "\tprocessed genes: 12848 (77.2%) \n", + "\tprocessed genes: 12927 (77.7%) \n", + "\tprocessed genes: 12958 (77.9%) \n", + "\tprocessed genes: 12972 (78%) \n", + "\tprocessed genes: 12987 (78.1%) \n", + "\tprocessed genes: 12988 (78.1%) \n", + "\tprocessed genes: 13000 (78.1%) \n", + "\tprocessed genes: 13005 (78.2%) \n", + "\tprocessed genes: 13019 (78.3%) \n", + "\tprocessed genes: 13026 (78.3%) \n", + "\tprocessed genes: 13045 (78.4%) \n", + "\tprocessed genes: 13065 (78.5%) \n", + "\tprocessed genes: 13095 (78.7%) \n", + "\tprocessed genes: 13122 (78.9%) \n", + "\tprocessed genes: 13141 (79%) \n", + "\tprocessed genes: 13154 (79.1%) \n", + "\tprocessed genes: 13178 (79.2%) \n", + "\tprocessed genes: 13196 (79.3%) \n", + "\tprocessed genes: 13218 (79.4%) \n", + "\tprocessed genes: 13250 (79.6%) \n", + "\tprocessed genes: 13274 (79.8%) \n", + "\tprocessed genes: 13281 (79.8%) \n", + "\tprocessed genes: 13288 (79.9%) \n", + "\tprocessed genes: 13298 (79.9%) \n", + "\tprocessed genes: 13305 (80%) \n", + "\tprocessed genes: 13340 (80.2%) \n", + "\tprocessed genes: 13380 (80.4%) \n", + "\tprocessed genes: 13450 (80.8%) \n", + "\tprocessed genes: 13453 (80.9%) \n", + "\tprocessed genes: 13499 (81.1%) \n", + "\tprocessed genes: 13530 (81.3%) \n", + "\tprocessed genes: 13579 (81.6%) \n", + "\tprocessed genes: 13620 (81.9%) \n", + "\tprocessed genes: 13659 (82.1%) \n", + "\tprocessed genes: 13684 (82.3%) \n", + "\tprocessed genes: 13717 (82.4%) \n", + "\tprocessed genes: 13758 (82.7%) \n", + "\tprocessed genes: 13779 (82.8%) \n", + "\tprocessed genes: 13818 (83.1%) \n", + "\tprocessed genes: 13840 (83.2%) \n", + "\tprocessed genes: 13874 (83.4%) \n", + "\tprocessed genes: 13885 (83.5%) \n", + "\tprocessed genes: 13903 (83.6%) \n", + "\tprocessed genes: 13957 (83.9%) \n", + "\tprocessed genes: 13989 (84.1%) \n", + "\tprocessed genes: 14001 (84.2%) \n", + "\tprocessed genes: 14011 (84.2%) \n", + "\tprocessed genes: 14022 (84.3%) \n", + "\tprocessed genes: 14035 (84.4%) \n", + "\tprocessed genes: 14068 (84.6%) \n", + "\tprocessed genes: 14080 (84.6%) \n", + "\tprocessed genes: 14097 (84.7%) \n", + "\tprocessed genes: 14105 (84.8%) \n", + "\tprocessed genes: 14119 (84.9%) \n", + "\tprocessed genes: 14138 (85%) \n", + "\tprocessed genes: 14171 (85.2%) \n", + "\tprocessed genes: 14204 (85.4%) \n", + "\tprocessed genes: 14222 (85.5%) \n", + "\tprocessed genes: 14235 (85.6%) \n", + "\tprocessed genes: 14254 (85.7%) \n", + "\tprocessed genes: 14273 (85.8%) \n", + "\tprocessed genes: 14297 (85.9%) \n", + "\tprocessed genes: 14323 (86.1%) \n", + "\tprocessed genes: 14380 (86.4%) \n", + "\tprocessed genes: 14418 (86.7%) \n", + "\tprocessed genes: 14439 (86.8%) \n", + "\tprocessed genes: 14461 (86.9%) \n", + "\tprocessed genes: 14482 (87%) \n", + "\tprocessed genes: 14492 (87.1%) \n", + "\tprocessed genes: 14514 (87.2%) \n", + "\tprocessed genes: 14524 (87.3%) \n", + "\tprocessed genes: 14541 (87.4%) \n", + "\tprocessed genes: 14559 (87.5%) \n", + "\tprocessed genes: 14571 (87.6%) \n", + "\tprocessed genes: 14598 (87.7%) \n", + "\tprocessed genes: 14622 (87.9%) \n", + "\tprocessed genes: 14652 (88.1%) \n", + "\tprocessed genes: 14686 (88.3%) \n", + "\tprocessed genes: 14714 (88.4%) \n", + "\tprocessed genes: 14735 (88.6%) \n", + "\tprocessed genes: 14759 (88.7%) \n", + "\tprocessed genes: 14778 (88.8%) \n", + "\tprocessed genes: 14800 (89%) \n", + "\tprocessed genes: 14826 (89.1%) \n", + "\tprocessed genes: 14855 (89.3%) \n", + "\tprocessed genes: 14902 (89.6%) \n", + "\tprocessed genes: 14960 (89.9%) \n", + "\tprocessed genes: 14991 (90.1%) \n", + "\tprocessed genes: 15020 (90.3%) \n", + "\tprocessed genes: 15056 (90.5%) \n", + "\tprocessed genes: 15083 (90.7%) \n", + "\tprocessed genes: 15127 (90.9%) \n", + "\tprocessed genes: 15155 (91.1%) \n", + "\tprocessed genes: 15182 (91.3%) \n", + "\tprocessed genes: 15211 (91.4%) \n", + "\tprocessed genes: 15235 (91.6%) \n", + "\tprocessed genes: 15255 (91.7%) \n", + "\tprocessed genes: 15279 (91.8%) \n", + "\tprocessed genes: 15308 (92%) \n", + "\tprocessed genes: 15325 (92.1%) \n", + "\tprocessed genes: 15346 (92.2%) \n", + "\tprocessed genes: 15371 (92.4%) \n", + "\tprocessed genes: 15390 (92.5%) \n", + "\tprocessed genes: 15405 (92.6%) \n", + "\tprocessed genes: 15421 (92.7%) \n", + "\tprocessed genes: 15432 (92.8%) \n", + "\tprocessed genes: 15460 (92.9%) \n", + "\tprocessed genes: 15476 (93%) \n", + "\tprocessed genes: 15501 (93.2%) \n", + "\tprocessed genes: 15508 (93.2%) \n", + "\tprocessed genes: 15520 (93.3%) \n", + "\tprocessed genes: 15537 (93.4%) \n", + "\tprocessed genes: 15564 (93.6%) \n", + "\tprocessed genes: 15603 (93.8%) \n", + "\tprocessed genes: 15639 (94%) \n", + "\tprocessed genes: 15660 (94.1%) \n", + "\tprocessed genes: 15672 (94.2%) \n", + "\tprocessed genes: 15683 (94.3%) \n", + "\tprocessed genes: 15688 (94.3%) \n", + "\tprocessed genes: 15711 (94.4%) \n", + "\tprocessed genes: 15735 (94.6%) \n", + "\tprocessed genes: 15808 (95%) \n", + "\tprocessed genes: 15842 (95.2%) \n", + "\tprocessed genes: 15862 (95.3%) \n", + "\tprocessed genes: 15921 (95.7%) \n", + "\tprocessed genes: 15932 (95.8%) \n", + "\tprocessed genes: 15951 (95.9%) \n", + "\tprocessed genes: 15962 (95.9%) \n", + "\tprocessed genes: 16000 (96.2%) \n", + "\tprocessed genes: 16004 (96.2%) \n", + "\tprocessed genes: 16032 (96.4%) \n", + "\tprocessed genes: 16065 (96.6%) \n", + "\tprocessed genes: 16098 (96.8%) \n", + "\tprocessed genes: 16129 (96.9%) \n", + "\tprocessed genes: 16146 (97%) \n", + "\tprocessed genes: 16156 (97.1%) \n", + "\tprocessed genes: 16167 (97.2%) \n", + "\tprocessed genes: 16179 (97.2%) \n", + "\tprocessed genes: 16198 (97.4%) \n", + "\tprocessed genes: 16221 (97.5%) \n", + "\tprocessed genes: 16234 (97.6%) \n", + "\tprocessed genes: 16264 (97.8%) \n", + "\tprocessed genes: 16307 (98%) \n", + "\tprocessed genes: 16343 (98.2%) \n", + "\tprocessed genes: 16353 (98.3%) \n", + "\tprocessed genes: 16380 (98.5%) \n", + "\tprocessed genes: 16417 (98.7%) \n", + "\tprocessed genes: 16430 (98.8%) \n", + "\tprocessed genes: 16462 (98.9%) \n", + "\tprocessed genes: 16505 (99.2%) \n", + "\tprocessed genes: 16529 (99.4%) \n", + "\tprocessed genes: 16561 (99.5%) \n", + "\tprocessed genes: 16573 (99.6%) \n", + "\tprocessed genes: 16580 (99.7%) \n", + "\tprocessed genes: 16595 (99.7%) \n", + "\tprocessed genes: 16600 (99.8%) \n", + "\tprocessed genes: 16632 (100%) \n", + " \n", + "\twriting gene analysis results to file ./data//model_pred/new_experiments/test_magma_out.genes.out\n", + "\twriting intermediate output to file ./data//model_pred/new_experiments/test_magma_out.genes.raw\n", + "\n", + "\n", + "End time is 21:34:07, Monday 25 Nov 2024 (elapsed: 00:26:36)\n", + "\n" + ] + } + ], + "source": [ + "import sys\n", + "sys.path.append('../')\n", + "from kgwas import KGWAS, KGWAS_Data\n", + "data = KGWAS_Data(data_path = './data/')\n", + "data.load_kg()\n", + "\n", + "data.load_external_gwas(example_file = True)\n", + "data.process_gwas_file()\n", + "data.prepare_split()\n", + "\n", + "run = KGWAS(data, device = 'cuda:7', exp_name = 'test')\n", + "run.load_pretrained('./data/model/test')\n", + "run.run_magma(path_to_magma = \"/dfs/user/kexinh/ggwas/magma\", bfile = \"/dfs/project/datasets/20220524-ukbiobank/data/genetics/ukb_sample_0.1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is an example output of the MAGMA run:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>GENE</th>\n", + " <th>CHR</th>\n", + " <th>START</th>\n", + " <th>STOP</th>\n", + " <th>NSNPS</th>\n", + " <th>NPARAM</th>\n", + " <th>N</th>\n", + " <th>ZSTAT</th>\n", + " <th>P</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>148398</td>\n", + " <td>1</td>\n", + " <td>859993</td>\n", + " <td>879961</td>\n", + " <td>6</td>\n", + " <td>4</td>\n", + " <td>9988</td>\n", + " <td>-1.10670</td>\n", + " <td>0.865800</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>26155</td>\n", + " <td>1</td>\n", + " <td>879583</td>\n", + " <td>894679</td>\n", + " <td>9</td>\n", + " <td>4</td>\n", + " <td>9988</td>\n", + " <td>0.36490</td>\n", + " <td>0.357590</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>339451</td>\n", + " <td>1</td>\n", + " <td>895967</td>\n", + " <td>901099</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>9988</td>\n", + " <td>1.40540</td>\n", + " <td>0.079955</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>84069</td>\n", + " <td>1</td>\n", + " <td>901872</td>\n", + " <td>910488</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>9988</td>\n", + " <td>1.56410</td>\n", + " <td>0.058896</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>84808</td>\n", + " <td>1</td>\n", + " <td>910579</td>\n", + " <td>917473</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>9988</td>\n", + " <td>1.25460</td>\n", + " <td>0.104820</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16632</th>\n", + " <td>23542</td>\n", + " <td>22</td>\n", + " <td>51039131</td>\n", + " <td>51049979</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>9988</td>\n", + " <td>1.15830</td>\n", + " <td>0.123360</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16633</th>\n", + " <td>410</td>\n", + " <td>22</td>\n", + " <td>51061182</td>\n", + " <td>51066601</td>\n", + " <td>9</td>\n", + " <td>5</td>\n", + " <td>9988</td>\n", + " <td>-0.65834</td>\n", + " <td>0.744840</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16634</th>\n", + " <td>85358</td>\n", + " <td>22</td>\n", + " <td>51113070</td>\n", + " <td>51171640</td>\n", + " <td>41</td>\n", + " <td>15</td>\n", + " <td>9988</td>\n", + " <td>-1.18500</td>\n", + " <td>0.881990</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16635</th>\n", + " <td>101928892</td>\n", + " <td>22</td>\n", + " <td>51123086</td>\n", + " <td>51125473</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>9988</td>\n", + " <td>-0.14590</td>\n", + " <td>0.558000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16636</th>\n", + " <td>49</td>\n", + " <td>22</td>\n", + " <td>51176652</td>\n", + " <td>51183727</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>9988</td>\n", + " <td>-0.42425</td>\n", + " <td>0.664310</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>16637 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " GENE CHR START STOP NSNPS NPARAM N ZSTAT \\\n", + "0 148398 1 859993 879961 6 4 9988 -1.10670 \n", + "1 26155 1 879583 894679 9 4 9988 0.36490 \n", + "2 339451 1 895967 901099 1 1 9988 1.40540 \n", + "3 84069 1 901872 910488 3 2 9988 1.56410 \n", + "4 84808 1 910579 917473 2 1 9988 1.25460 \n", + "... ... ... ... ... ... ... ... ... \n", + "16632 23542 22 51039131 51049979 3 2 9988 1.15830 \n", + "16633 410 22 51061182 51066601 9 5 9988 -0.65834 \n", + "16634 85358 22 51113070 51171640 41 15 9988 -1.18500 \n", + "16635 101928892 22 51123086 51125473 2 1 9988 -0.14590 \n", + "16636 49 22 51176652 51183727 1 1 9988 -0.42425 \n", + "\n", + " P \n", + "0 0.865800 \n", + "1 0.357590 \n", + "2 0.079955 \n", + "3 0.058896 \n", + "4 0.104820 \n", + "... ... \n", + "16632 0.123360 \n", + "16633 0.744840 \n", + "16634 0.881990 \n", + "16635 0.558000 \n", + "16636 0.664310 \n", + "\n", + "[16637 rows x 9 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "pd.read_csv('./data/model_pred/new_experiments/test_magma_out.genes.out', sep = '\\s+')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To know which gene this gene id maps to, you can use the following file:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>79501</td>\n", + " <td>1</td>\n", + " <td>69091</td>\n", + " <td>70008</td>\n", + " <td>+</td>\n", + " <td>OR4F5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>100996442</td>\n", + " <td>1</td>\n", + " <td>142447</td>\n", + " <td>174392</td>\n", + " <td>-</td>\n", + " <td>LOC100996442</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>729759</td>\n", + " <td>1</td>\n", + " <td>367659</td>\n", + " <td>368597</td>\n", + " <td>+</td>\n", + " <td>OR4F29</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>81399</td>\n", + " <td>1</td>\n", + " <td>621096</td>\n", + " <td>622034</td>\n", + " <td>-</td>\n", + " <td>OR4F16</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>148398</td>\n", + " <td>1</td>\n", + " <td>859993</td>\n", + " <td>879961</td>\n", + " <td>+</td>\n", + " <td>SAMD11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19422</th>\n", + " <td>442867</td>\n", + " <td>Y</td>\n", + " <td>26764151</td>\n", + " <td>26785354</td>\n", + " <td>+</td>\n", + " <td>BPY2B</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19423</th>\n", + " <td>57054</td>\n", + " <td>Y</td>\n", + " <td>26909216</td>\n", + " <td>26959639</td>\n", + " <td>-</td>\n", + " <td>DAZ3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19424</th>\n", + " <td>57135</td>\n", + " <td>Y</td>\n", + " <td>26979967</td>\n", + " <td>27053187</td>\n", + " <td>+</td>\n", + " <td>DAZ4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19425</th>\n", + " <td>442868</td>\n", + " <td>Y</td>\n", + " <td>27177048</td>\n", + " <td>27198251</td>\n", + " <td>-</td>\n", + " <td>BPY2C</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19426</th>\n", + " <td>9085</td>\n", + " <td>Y</td>\n", + " <td>27768264</td>\n", + " <td>27771049</td>\n", + " <td>+</td>\n", + " <td>CDY1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>19427 rows × 6 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5\n", + "0 79501 1 69091 70008 + OR4F5\n", + "1 100996442 1 142447 174392 - LOC100996442\n", + "2 729759 1 367659 368597 + OR4F29\n", + "3 81399 1 621096 622034 - OR4F16\n", + "4 148398 1 859993 879961 + SAMD11\n", + "... ... .. ... ... .. ...\n", + "19422 442867 Y 26764151 26785354 + BPY2B\n", + "19423 57054 Y 26909216 26959639 - DAZ3\n", + "19424 57135 Y 26979967 27053187 + DAZ4\n", + "19425 442868 Y 27177048 27198251 - BPY2C\n", + "19426 9085 Y 27768264 27771049 + CDY1\n", + "\n", + "[19427 rows x 6 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv('./data/misc_data/NCBI37.3.gene.loc', sep = '\\t', header = None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "a100_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}