Diff of /modas/gwas_cmd.py [000000] .. [a43cea]

Switch to unified view

a b/modas/gwas_cmd.py
1
import pandas as pd
2
import numpy as np
3
from sklearn.decomposition import PCA
4
from rpy2.robjects import pandas2ri
5
from rpy2.rinterface_lib.embedded import RRuntimeError
6
import rpy2.robjects as robjects
7
from rpy2.robjects.packages import importr
8
from rpy2.rinterface_lib.callbacks import logger as rpy2_logger
9
import subprocess
10
import logging
11
import glob, os
12
import shutil
13
import re
14
15
pandas2ri.activate()
16
rpy2_logger.setLevel(logging.ERROR)
17
rMVP = importr('rMVP')
18
base = importr('base')
19
data_table = importr('data.table')
20
bigmemory = importr('bigmemory')
21
22
23
utils_path = subprocess.check_output('locate modas/utils', shell=True, text=True, encoding='utf-8')
24
#utils_path = '/'.join(re.search('\n(.*site-packages.*)\n', utils_path).group(1).split('/')[:-1])
25
utils_path = re.search('\n(.*site-packages.*)\n', utils_path).group(1)
26
if not utils_path.endswith('utils'):
27
    utils_path = '/'.join(utils_path.split('/')[:-1])
28
29
30
def gemma_cmd(model, geno_prefix, kin_prefix, n, out_prefix):
31
    if model == 'LM':
32
        return utils_path + '/gemma -bfile {0} -lm  -o {1}'.format(geno_prefix, out_prefix)
33
    if model == 'MLM':
34
        return utils_path + '/gemma -bfile {0} -k ./output/{1}.cXX.txt -lmm -n {2} -o {3}'.format(geno_prefix, kin_prefix, n, out_prefix)
35
36
37
def rmvp(model, cv_geno_prefix, geno_prefix, omics_phe, threads, out_path):
38
    try:
39
        base.sink('/dev/null')
40
        if model == 'GLM' or model == 'FarmCPU':
41
            if not os.path.exists(cv_geno_prefix + '.pc.desc'):
42
                rMVP.MVP_Data(fileBed=cv_geno_prefix, fileKin=False, filePC=False, out=cv_geno_prefix,
43
                              verbose=False)
44
                rMVP.MVP_Data_PC(True, mvp_prefix=cv_geno_prefix, pcs_keep=10, verbose=False)
45
        if model == 'MLM':
46
            if not os.path.exists(cv_geno_prefix + '.kin.desc'):
47
                rMVP.MVP_Data(fileBed=cv_geno_prefix, fileKin=False, filePC=False, out=cv_geno_prefix,
48
                              verbose=False)
49
                rMVP.MVP_Data_Kin(True, mvp_prefix=cv_geno_prefix, verbose=False)
50
        if not os.path.exists(geno_prefix + '.geno.desc'):
51
            rMVP.MVP_Data(fileBed=geno_prefix, fileKin=False, filePC=False, out=geno_prefix, verbose=False)
52
        geno = bigmemory.attach_big_matrix(geno_prefix +'.geno.desc')
53
        map_file = pd.read_csv(geno_prefix +'.geno.map', sep='\t')
54
        if model == 'GLM' or model == 'FarmCPU':
55
            Covariates_PC = bigmemory.as_matrix(bigmemory.attach_big_matrix(cv_geno_prefix + '.pc.desc'))
56
        if model == 'MLM':
57
            Kinship = bigmemory.attach_big_matrix(cv_geno_prefix + '.kin.desc')
58
        if model == 'GLM':
59
            # robjects.r('''
60
            #     gwas <- function(omics_phe, geno, map_file, Covariates_PC, threads){
61
            #         library(rMVP)
62
            #         mvp <- MVP(phe=omics_phe, geno=geno, map=map_file, CV.GLM=Covariates_PC, priority='speed', nPC.GLM=5,
63
            #         ncpus=threads, maxLoop=10, threshold=0.05, method=c('GLM'), file.output=F, verbose=F)
64
            #         res <- cbind(mvp$map, mvp$glm.results)
65
            #         return(res)
66
            #     }
67
            # ''')
68
            # mvp = robjects.r('gwas')
69
            # res = mvp(omics_phe, geno, map_file, Covariates_PC, threads)
70
            mvp = rMVP.MVP(phe=omics_phe, geno=geno, map=map_file, CV_GLM=Covariates_PC, priority='speed', nPC_GLM=5,
71
                    ncpus=threads, maxLoop=10, threshold=0.05, method=['GLM'], file_output=False,
72
                    verbose=False)
73
            gwas_res = pd.DataFrame(mvp.rx2('glm.results'), columns=['Effect', 'SE', str(omics_phe.columns[1]) + '.GLM'])
74
            pos = pd.DataFrame(mvp.rx2('map'))
75
            pos.index = gwas_res.index
76
            res = pd.concat([pos, gwas_res], axis=1)
77
        if model == 'FarmCPU':
78
            # robjects.r('''
79
            #     gwas <- function(omics_phe, geno, map_file, Covariates_PC, threads){
80
            #         library(rMVP)
81
            #         mvp <- MVP(phe=omics_phe, geno=geno, map=map_file, CV.GLM=Covariates_PC, priority='speed', nPC.GLM=5,
82
            #         ncpus=threads, maxLoop=10, threshold=0.05, method=c('FarmCPU'), method.bin='static', file.output=F, verbose=F)
83
            #         res <- cbind(mvp$map, mvp$farmcpu.results)
84
            #         return(res)
85
            #     }
86
            # ''')
87
            # mvp = robjects.r('gwas')
88
            # res = mvp(omics_phe, geno, map_file, Covariates_PC, threads)
89
            mvp = rMVP.MVP(phe=omics_phe, geno=geno, map=map_file, CV_FarmCPU=Covariates_PC, priority='speed', nPC_FarmCPU=3,
90
                     ncpus=threads, maxLoop=10, threshold=0.05, method=['FarmCPU'], file_output=False, method_bin='static',
91
                     verbose=True)
92
            gwas_res = pd.DataFrame(mvp.rx2('farmcpu.results'), columns=['Effect', 'SE', str(omics_phe.columns[1]) + '.FarmCPU'])
93
            pos = pd.DataFrame(mvp.rx2('map'))
94
            pos.index = gwas_res.index
95
            res = pd.concat([pos, gwas_res], axis=1)
96
        if model == 'MLM':
97
            # robjects.r('''
98
            #     gwas <- function(omics_phe, geno, map_file, Kinship, threads){
99
            #         library(rMVP)
100
            #         mvp <- MVP(phe=omics_phe, geno=geno, map=map_file, K=Kinship, priority='speed', nPC.GLM=5,
101
            #         vc.method='BRENT', ncpus=threads, maxLoop=10, threshold=0.05, method=c('MLM'), file.output=F, verbose=F)
102
            #         res <- cbind(mvp$map, mvp$mlm.results)
103
            #         return(res)
104
            #     }
105
            # ''')
106
            # mvp = robjects.r('gwas')
107
            # res = mvp(omics_phe, geno, map_file, Kinship, threads)
108
            mvp = rMVP.MVP(phe=omics_phe, geno=geno, map=map_file, K=Kinship, priority='speed', vc_method='BRENT',
109
                     ncpus=threads, maxLoop=10, threshold=0.05, method=['MLM'], file_output=False,
110
                     verbose=False)
111
            gwas_res = pd.DataFrame(mvp.rx2('mlm.results'), columns=['Effect', 'SE', str(omics_phe.columns[1])+'.MLM'])
112
            pos = pd.DataFrame(mvp.rx2('map'))
113
            pos.index = gwas_res.index
114
            res = pd.concat([pos, gwas_res], axis=1)
115
        res.to_csv(out_path.rstrip('/') + '/' + str(omics_phe.columns[1])+'.' + model + '.csv', index=False)
116
        base.sink()
117
    except RRuntimeError:
118
        return 1
119
    except ValueError:
120
        return 1
121
    else:
122
        return 0
123
124
125
def gapit(model, geno, omics_phe, gapit_path):
126
    try:
127
        base.sink('/dev/null')
128
        robjects.r('source("'+gapit_path.rstrip('/')+'/GAPIT.library.R")')
129
        robjects.r('source("'+gapit_path.rstrip('/')+'/gapit_functions.txt")')
130
        robjects.r('''gapit <- function(geno,omics_phe,model){
131
            library(bigsnpr)
132
            g <- snp_readBed(paste(geno,'.bed',sep=''), backingfile=tempfile())
133
            g <- snp_attach(g)
134
            GD <- cbind(g$fam$family.ID,as.data.frame(snp_fastImputeSimple(g$genotypes, method='mode')[]))
135
            names(GD) <- c('Taxa',g$map$marker.ID)
136
            GM <- g$map[c('marker.ID','chromosome','physical.pos')]
137
            names(GM) <- c('Name','Chromosome','Position')
138
            GAPIT(Y=omics_phe, GD=GD, GM=GM, model=model, Major.allele.zero = T, SNP.MAF=0.05)
139
        }''')
140
        GAPIT = robjects.r('gapit')
141
        GAPIT(geno, omics_phe, model)
142
        base.sink()
143
    except RRuntimeError:
144
        return 1
145
    except ValueError:
146
        return 1
147
    else:
148
        return 0