|
a |
|
b/modas/gwas_cmd.py |
|
|
1 |
import pandas as pd |
|
|
2 |
import numpy as np |
|
|
3 |
from sklearn.decomposition import PCA |
|
|
4 |
from rpy2.robjects import pandas2ri |
|
|
5 |
from rpy2.rinterface_lib.embedded import RRuntimeError |
|
|
6 |
import rpy2.robjects as robjects |
|
|
7 |
from rpy2.robjects.packages import importr |
|
|
8 |
from rpy2.rinterface_lib.callbacks import logger as rpy2_logger |
|
|
9 |
import subprocess |
|
|
10 |
import logging |
|
|
11 |
import glob, os |
|
|
12 |
import shutil |
|
|
13 |
import re |
|
|
14 |
|
|
|
15 |
pandas2ri.activate() |
|
|
16 |
rpy2_logger.setLevel(logging.ERROR) |
|
|
17 |
rMVP = importr('rMVP') |
|
|
18 |
base = importr('base') |
|
|
19 |
data_table = importr('data.table') |
|
|
20 |
bigmemory = importr('bigmemory') |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
utils_path = subprocess.check_output('locate modas/utils', shell=True, text=True, encoding='utf-8') |
|
|
24 |
#utils_path = '/'.join(re.search('\n(.*site-packages.*)\n', utils_path).group(1).split('/')[:-1]) |
|
|
25 |
utils_path = re.search('\n(.*site-packages.*)\n', utils_path).group(1) |
|
|
26 |
if not utils_path.endswith('utils'): |
|
|
27 |
utils_path = '/'.join(utils_path.split('/')[:-1]) |
|
|
28 |
|
|
|
29 |
|
|
|
30 |
def gemma_cmd(model, geno_prefix, kin_prefix, n, out_prefix): |
|
|
31 |
if model == 'LM': |
|
|
32 |
return utils_path + '/gemma -bfile {0} -lm -o {1}'.format(geno_prefix, out_prefix) |
|
|
33 |
if model == 'MLM': |
|
|
34 |
return utils_path + '/gemma -bfile {0} -k ./output/{1}.cXX.txt -lmm -n {2} -o {3}'.format(geno_prefix, kin_prefix, n, out_prefix) |
|
|
35 |
|
|
|
36 |
|
|
|
37 |
def rmvp(model, cv_geno_prefix, geno_prefix, omics_phe, threads, out_path): |
|
|
38 |
try: |
|
|
39 |
base.sink('/dev/null') |
|
|
40 |
if model == 'GLM' or model == 'FarmCPU': |
|
|
41 |
if not os.path.exists(cv_geno_prefix + '.pc.desc'): |
|
|
42 |
rMVP.MVP_Data(fileBed=cv_geno_prefix, fileKin=False, filePC=False, out=cv_geno_prefix, |
|
|
43 |
verbose=False) |
|
|
44 |
rMVP.MVP_Data_PC(True, mvp_prefix=cv_geno_prefix, pcs_keep=10, verbose=False) |
|
|
45 |
if model == 'MLM': |
|
|
46 |
if not os.path.exists(cv_geno_prefix + '.kin.desc'): |
|
|
47 |
rMVP.MVP_Data(fileBed=cv_geno_prefix, fileKin=False, filePC=False, out=cv_geno_prefix, |
|
|
48 |
verbose=False) |
|
|
49 |
rMVP.MVP_Data_Kin(True, mvp_prefix=cv_geno_prefix, verbose=False) |
|
|
50 |
if not os.path.exists(geno_prefix + '.geno.desc'): |
|
|
51 |
rMVP.MVP_Data(fileBed=geno_prefix, fileKin=False, filePC=False, out=geno_prefix, verbose=False) |
|
|
52 |
geno = bigmemory.attach_big_matrix(geno_prefix +'.geno.desc') |
|
|
53 |
map_file = pd.read_csv(geno_prefix +'.geno.map', sep='\t') |
|
|
54 |
if model == 'GLM' or model == 'FarmCPU': |
|
|
55 |
Covariates_PC = bigmemory.as_matrix(bigmemory.attach_big_matrix(cv_geno_prefix + '.pc.desc')) |
|
|
56 |
if model == 'MLM': |
|
|
57 |
Kinship = bigmemory.attach_big_matrix(cv_geno_prefix + '.kin.desc') |
|
|
58 |
if model == 'GLM': |
|
|
59 |
# robjects.r(''' |
|
|
60 |
# gwas <- function(omics_phe, geno, map_file, Covariates_PC, threads){ |
|
|
61 |
# library(rMVP) |
|
|
62 |
# mvp <- MVP(phe=omics_phe, geno=geno, map=map_file, CV.GLM=Covariates_PC, priority='speed', nPC.GLM=5, |
|
|
63 |
# ncpus=threads, maxLoop=10, threshold=0.05, method=c('GLM'), file.output=F, verbose=F) |
|
|
64 |
# res <- cbind(mvp$map, mvp$glm.results) |
|
|
65 |
# return(res) |
|
|
66 |
# } |
|
|
67 |
# ''') |
|
|
68 |
# mvp = robjects.r('gwas') |
|
|
69 |
# res = mvp(omics_phe, geno, map_file, Covariates_PC, threads) |
|
|
70 |
mvp = rMVP.MVP(phe=omics_phe, geno=geno, map=map_file, CV_GLM=Covariates_PC, priority='speed', nPC_GLM=5, |
|
|
71 |
ncpus=threads, maxLoop=10, threshold=0.05, method=['GLM'], file_output=False, |
|
|
72 |
verbose=False) |
|
|
73 |
gwas_res = pd.DataFrame(mvp.rx2('glm.results'), columns=['Effect', 'SE', str(omics_phe.columns[1]) + '.GLM']) |
|
|
74 |
pos = pd.DataFrame(mvp.rx2('map')) |
|
|
75 |
pos.index = gwas_res.index |
|
|
76 |
res = pd.concat([pos, gwas_res], axis=1) |
|
|
77 |
if model == 'FarmCPU': |
|
|
78 |
# robjects.r(''' |
|
|
79 |
# gwas <- function(omics_phe, geno, map_file, Covariates_PC, threads){ |
|
|
80 |
# library(rMVP) |
|
|
81 |
# mvp <- MVP(phe=omics_phe, geno=geno, map=map_file, CV.GLM=Covariates_PC, priority='speed', nPC.GLM=5, |
|
|
82 |
# ncpus=threads, maxLoop=10, threshold=0.05, method=c('FarmCPU'), method.bin='static', file.output=F, verbose=F) |
|
|
83 |
# res <- cbind(mvp$map, mvp$farmcpu.results) |
|
|
84 |
# return(res) |
|
|
85 |
# } |
|
|
86 |
# ''') |
|
|
87 |
# mvp = robjects.r('gwas') |
|
|
88 |
# res = mvp(omics_phe, geno, map_file, Covariates_PC, threads) |
|
|
89 |
mvp = rMVP.MVP(phe=omics_phe, geno=geno, map=map_file, CV_FarmCPU=Covariates_PC, priority='speed', nPC_FarmCPU=3, |
|
|
90 |
ncpus=threads, maxLoop=10, threshold=0.05, method=['FarmCPU'], file_output=False, method_bin='static', |
|
|
91 |
verbose=True) |
|
|
92 |
gwas_res = pd.DataFrame(mvp.rx2('farmcpu.results'), columns=['Effect', 'SE', str(omics_phe.columns[1]) + '.FarmCPU']) |
|
|
93 |
pos = pd.DataFrame(mvp.rx2('map')) |
|
|
94 |
pos.index = gwas_res.index |
|
|
95 |
res = pd.concat([pos, gwas_res], axis=1) |
|
|
96 |
if model == 'MLM': |
|
|
97 |
# robjects.r(''' |
|
|
98 |
# gwas <- function(omics_phe, geno, map_file, Kinship, threads){ |
|
|
99 |
# library(rMVP) |
|
|
100 |
# mvp <- MVP(phe=omics_phe, geno=geno, map=map_file, K=Kinship, priority='speed', nPC.GLM=5, |
|
|
101 |
# vc.method='BRENT', ncpus=threads, maxLoop=10, threshold=0.05, method=c('MLM'), file.output=F, verbose=F) |
|
|
102 |
# res <- cbind(mvp$map, mvp$mlm.results) |
|
|
103 |
# return(res) |
|
|
104 |
# } |
|
|
105 |
# ''') |
|
|
106 |
# mvp = robjects.r('gwas') |
|
|
107 |
# res = mvp(omics_phe, geno, map_file, Kinship, threads) |
|
|
108 |
mvp = rMVP.MVP(phe=omics_phe, geno=geno, map=map_file, K=Kinship, priority='speed', vc_method='BRENT', |
|
|
109 |
ncpus=threads, maxLoop=10, threshold=0.05, method=['MLM'], file_output=False, |
|
|
110 |
verbose=False) |
|
|
111 |
gwas_res = pd.DataFrame(mvp.rx2('mlm.results'), columns=['Effect', 'SE', str(omics_phe.columns[1])+'.MLM']) |
|
|
112 |
pos = pd.DataFrame(mvp.rx2('map')) |
|
|
113 |
pos.index = gwas_res.index |
|
|
114 |
res = pd.concat([pos, gwas_res], axis=1) |
|
|
115 |
res.to_csv(out_path.rstrip('/') + '/' + str(omics_phe.columns[1])+'.' + model + '.csv', index=False) |
|
|
116 |
base.sink() |
|
|
117 |
except RRuntimeError: |
|
|
118 |
return 1 |
|
|
119 |
except ValueError: |
|
|
120 |
return 1 |
|
|
121 |
else: |
|
|
122 |
return 0 |
|
|
123 |
|
|
|
124 |
|
|
|
125 |
def gapit(model, geno, omics_phe, gapit_path): |
|
|
126 |
try: |
|
|
127 |
base.sink('/dev/null') |
|
|
128 |
robjects.r('source("'+gapit_path.rstrip('/')+'/GAPIT.library.R")') |
|
|
129 |
robjects.r('source("'+gapit_path.rstrip('/')+'/gapit_functions.txt")') |
|
|
130 |
robjects.r('''gapit <- function(geno,omics_phe,model){ |
|
|
131 |
library(bigsnpr) |
|
|
132 |
g <- snp_readBed(paste(geno,'.bed',sep=''), backingfile=tempfile()) |
|
|
133 |
g <- snp_attach(g) |
|
|
134 |
GD <- cbind(g$fam$family.ID,as.data.frame(snp_fastImputeSimple(g$genotypes, method='mode')[])) |
|
|
135 |
names(GD) <- c('Taxa',g$map$marker.ID) |
|
|
136 |
GM <- g$map[c('marker.ID','chromosome','physical.pos')] |
|
|
137 |
names(GM) <- c('Name','Chromosome','Position') |
|
|
138 |
GAPIT(Y=omics_phe, GD=GD, GM=GM, model=model, Major.allele.zero = T, SNP.MAF=0.05) |
|
|
139 |
}''') |
|
|
140 |
GAPIT = robjects.r('gapit') |
|
|
141 |
GAPIT(geno, omics_phe, model) |
|
|
142 |
base.sink() |
|
|
143 |
except RRuntimeError: |
|
|
144 |
return 1 |
|
|
145 |
except ValueError: |
|
|
146 |
return 1 |
|
|
147 |
else: |
|
|
148 |
return 0 |