|
a |
|
b/bin/oddt_cli |
|
|
1 |
#!/usr/bin/env python |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
import os |
|
|
4 |
import sys |
|
|
5 |
|
|
|
6 |
# FIX Windows multiprocessing |
|
|
7 |
# Module multiprocessing is organized differently in Python 3.4+ |
|
|
8 |
try: |
|
|
9 |
# Python 3.4+ |
|
|
10 |
if sys.platform.startswith('win'): |
|
|
11 |
import multiprocessing.popen_spawn_win32 as forking |
|
|
12 |
else: |
|
|
13 |
import multiprocessing.popen_fork as forking |
|
|
14 |
except ImportError: |
|
|
15 |
import multiprocessing.forking as forking |
|
|
16 |
|
|
|
17 |
if sys.platform.startswith('win'): |
|
|
18 |
# First define a modified version of Popen. |
|
|
19 |
class _Popen(forking.Popen): |
|
|
20 |
def __init__(self, *args, **kw): |
|
|
21 |
if hasattr(sys, 'frozen'): |
|
|
22 |
# We have to set original _MEIPASS2 value from sys._MEIPASS |
|
|
23 |
# to get --onefile mode working. |
|
|
24 |
os.putenv('_MEIPASS2', sys._MEIPASS) |
|
|
25 |
try: |
|
|
26 |
super(_Popen, self).__init__(*args, **kw) |
|
|
27 |
finally: |
|
|
28 |
if hasattr(sys, 'frozen'): |
|
|
29 |
# On some platforms (e.g. AIX) 'os.unsetenv()' is not |
|
|
30 |
# available. In those cases we cannot delete the variable |
|
|
31 |
# but only set it to the empty string. The bootloader |
|
|
32 |
# can handle this case. |
|
|
33 |
if hasattr(os, 'unsetenv'): |
|
|
34 |
os.unsetenv('_MEIPASS2') |
|
|
35 |
else: |
|
|
36 |
os.putenv('_MEIPASS2', '') |
|
|
37 |
|
|
|
38 |
# Second override 'Popen' class with our modified version. |
|
|
39 |
forking.Popen = _Popen |
|
|
40 |
# END Fix Windows multiprocessing |
|
|
41 |
import multiprocessing |
|
|
42 |
import six |
|
|
43 |
from os.path import isfile |
|
|
44 |
from ast import literal_eval |
|
|
45 |
import argparse |
|
|
46 |
|
|
|
47 |
import oddt |
|
|
48 |
from oddt.scoring import scorer |
|
|
49 |
|
|
|
50 |
|
|
|
51 |
def main(): |
|
|
52 |
# arguments |
|
|
53 |
parser = argparse.ArgumentParser( |
|
|
54 |
description='Open Drug Discovery (ODDT) command line tools') |
|
|
55 |
parser.add_argument('--toolkit', |
|
|
56 |
dest='toolkit', |
|
|
57 |
choices=['ob', 'rdk'], |
|
|
58 |
default='ob', |
|
|
59 |
help=('Choose which toolkit should be used for ' |
|
|
60 |
'calculations, either "ob" (OpenBabel) or ' |
|
|
61 |
'"rdkit" (RDKit) (default: ob)')) |
|
|
62 |
parser.add_argument('-n', '--n_cpu', |
|
|
63 |
dest='n_cpu', |
|
|
64 |
type=int, |
|
|
65 |
help=('The number of parallel processes. ' |
|
|
66 |
'-1 automatically assigns maximum number of CPUs.' |
|
|
67 |
' (default=-1)')) |
|
|
68 |
parser.add_argument('--version', |
|
|
69 |
action='version', |
|
|
70 |
version='%(prog)s ' + oddt.__version__) |
|
|
71 |
|
|
|
72 |
parser.add_argument('-c', '--chunksize', |
|
|
73 |
dest='chunksize', |
|
|
74 |
type=int, |
|
|
75 |
default=100, |
|
|
76 |
help=('The number of molecules to process in a chunk. ' |
|
|
77 |
' (default=100)')) |
|
|
78 |
|
|
|
79 |
# in/out files and formats |
|
|
80 |
parser.add_argument('in_file', nargs='+', |
|
|
81 |
help='Input files of formats supported by toolkit.') |
|
|
82 |
parser.add_argument('-i', dest='in_format', help='Input file(s) format') |
|
|
83 |
parser.add_argument('-o', dest='out_format', help='Output file format') |
|
|
84 |
parser.add_argument('-O', '--output', dest='out_file', help='Output file') |
|
|
85 |
|
|
|
86 |
# filter |
|
|
87 |
group = parser.add_argument_group('Filtering') |
|
|
88 |
group.add_argument('--filter', |
|
|
89 |
dest='filter', |
|
|
90 |
action='append', |
|
|
91 |
default=[], |
|
|
92 |
help=('Choose built-in filters to be used (eg. "ro5", ' |
|
|
93 |
'"ro3", "pains")')) |
|
|
94 |
|
|
|
95 |
# fingerprints |
|
|
96 |
group = parser.add_argument_group('Similarity searching') |
|
|
97 |
group.add_argument('--similarity', |
|
|
98 |
dest='similarity', |
|
|
99 |
action='append', |
|
|
100 |
default=[], |
|
|
101 |
choices=['ifp', 'sifp', 'usr', 'usr_cat', 'electroshape'], |
|
|
102 |
help='Choose similarity method to use (eg. "ifp", "sifp", ' |
|
|
103 |
'"usr", "usr_cat", "electroshape")') |
|
|
104 |
|
|
|
105 |
group.add_argument('--cutoff', |
|
|
106 |
dest='cutoff', |
|
|
107 |
type=float, |
|
|
108 |
default=0.9, |
|
|
109 |
help=('Similarity cufoff below which molecules will be' |
|
|
110 |
' ignored.')) |
|
|
111 |
|
|
|
112 |
group.add_argument('--query', |
|
|
113 |
dest='query', |
|
|
114 |
action='append', |
|
|
115 |
help='Query molecule(s) for similarity searching') |
|
|
116 |
|
|
|
117 |
# docking |
|
|
118 |
group = parser.add_argument_group('Protein-Ligand docking') |
|
|
119 |
group.add_argument('--dock', |
|
|
120 |
dest='dock', |
|
|
121 |
choices=['autodock_vina'], |
|
|
122 |
help='Choose docking software to be used') |
|
|
123 |
group.add_argument('--receptor', help='Protein file') |
|
|
124 |
group.add_argument('--auto_ligand', |
|
|
125 |
help='Docking Box is determined on that ligand') |
|
|
126 |
group.add_argument('--center', type=literal_eval, |
|
|
127 |
help='Docking Box center (x,y,z)') |
|
|
128 |
group.add_argument('--size', type=literal_eval, |
|
|
129 |
help='Docking Box dimentions (x,y,z)') |
|
|
130 |
group.add_argument('--exhaustiveness', default=8, type=int, |
|
|
131 |
help='Exhaustiveness of docking') |
|
|
132 |
group.add_argument('--seed', help='Random Seed') |
|
|
133 |
|
|
|
134 |
# scoring |
|
|
135 |
|
|
|
136 |
# generate scoring functions options |
|
|
137 |
sf_choices = ['autodock_vina', 'rfscore', 'nnscore'] |
|
|
138 |
for v in [1, 2, 3]: |
|
|
139 |
sf_choices.append('rfscore_v%i' % v) |
|
|
140 |
for v in ['linear', 'nn', 'rf']: |
|
|
141 |
sf_choices.append('plec%s' % v) |
|
|
142 |
|
|
|
143 |
for pdbbind_version in [2007, 2012, 2013, 2014, 2015, 2016]: |
|
|
144 |
for v in [1, 2, 3]: |
|
|
145 |
sf_choices.append('rfscore_v%i_pdbbind%i' % (v, pdbbind_version)) |
|
|
146 |
sf_choices.append('nnscore_pdbbind%i' % (pdbbind_version)) |
|
|
147 |
|
|
|
148 |
# PLECscore is supported only for v2016+ |
|
|
149 |
for pdbbind_version in [2016]: |
|
|
150 |
for v in ['linear', 'nn', 'rf']: |
|
|
151 |
sf_choices.append('plec%s_pdbbind%i' % (v, pdbbind_version)) |
|
|
152 |
|
|
|
153 |
group = parser.add_argument_group('Rescoring') |
|
|
154 |
group.add_argument('--score', |
|
|
155 |
dest='score', |
|
|
156 |
choices=sf_choices, |
|
|
157 |
action='append', |
|
|
158 |
default=[], |
|
|
159 |
help='Choose built-in scoring function to be used') |
|
|
160 |
group.add_argument('--score_file', |
|
|
161 |
dest='score_file', |
|
|
162 |
action='append', |
|
|
163 |
default=[], |
|
|
164 |
help='Choose ODDT scoring function saved to file (pickle)') |
|
|
165 |
|
|
|
166 |
parser.add_argument('--field', |
|
|
167 |
dest='save_fields', |
|
|
168 |
action='append', |
|
|
169 |
default=[], |
|
|
170 |
help=('Field to save (eg. in CSV). Each field should be' |
|
|
171 |
' specified separately.')) |
|
|
172 |
|
|
|
173 |
args = parser.parse_args() |
|
|
174 |
|
|
|
175 |
# Switch toolkits |
|
|
176 |
if 'toolkit' in args: |
|
|
177 |
if (args.toolkit == 'rdk' or ('ODDT_TOOLKIT' in os.environ and |
|
|
178 |
os.environ['ODDT_TOOLKIT'] == 'rdk')): |
|
|
179 |
from oddt.toolkits import rdk |
|
|
180 |
oddt.toolkit = rdk |
|
|
181 |
else: # OB as fallback |
|
|
182 |
from oddt.toolkits import ob |
|
|
183 |
oddt.toolkit = ob |
|
|
184 |
|
|
|
185 |
from oddt.virtualscreening import virtualscreening as vs |
|
|
186 |
|
|
|
187 |
# Create pipeline for docking and rescoring |
|
|
188 |
pipeline = vs(n_cpu=args.n_cpu if 'n_cpu' in args else -1, |
|
|
189 |
chunksize=args.chunksize) |
|
|
190 |
for f in args.in_file: |
|
|
191 |
if args.in_format: |
|
|
192 |
fmt = args.in_format |
|
|
193 |
else: # autodiscover |
|
|
194 |
tmp = f.split('.') |
|
|
195 |
if tmp[-1] == 'gz': |
|
|
196 |
fmt = tmp[-2] |
|
|
197 |
else: |
|
|
198 |
fmt = tmp[-1] |
|
|
199 |
if isfile(f): |
|
|
200 |
pipeline.load_ligands(fmt, f) # add loading ligands from STDIN? |
|
|
201 |
else: |
|
|
202 |
raise IOError("File does not exist: '%s'" % f) |
|
|
203 |
|
|
|
204 |
# Filter ligands |
|
|
205 |
for filter in args.filter: |
|
|
206 |
pipeline.apply_filter(filter) |
|
|
207 |
|
|
|
208 |
receptor = None # Not all similarity methods require receptor/protein |
|
|
209 |
# load protein once |
|
|
210 |
if args.receptor: |
|
|
211 |
extension = args.receptor.split('.')[-1] |
|
|
212 |
receptor = six.next(oddt.toolkit.readfile(extension, args.receptor)) |
|
|
213 |
receptor.protein = True |
|
|
214 |
|
|
|
215 |
if args.query: |
|
|
216 |
query = [six.next(oddt.toolkit.readfile(q.split('.')[-1], q)) |
|
|
217 |
for q in args.query] |
|
|
218 |
|
|
|
219 |
for i, sim in enumerate(args.similarity): |
|
|
220 |
pipeline.similarity(sim, query, protein=receptor, cutoff=args.cutoff) |
|
|
221 |
|
|
|
222 |
# Docking |
|
|
223 |
if args.dock == 'autodock_vina': |
|
|
224 |
kwargs = {} |
|
|
225 |
if args.center: |
|
|
226 |
kwargs['center'] = args.center |
|
|
227 |
if args.size: |
|
|
228 |
kwargs['size'] = args.size |
|
|
229 |
if args.size: |
|
|
230 |
kwargs['size'] = args.size |
|
|
231 |
if args.auto_ligand: |
|
|
232 |
kwargs['auto_ligand'] = args.auto_ligand |
|
|
233 |
if args.exhaustiveness: |
|
|
234 |
kwargs['exhaustiveness'] = args.exhaustiveness |
|
|
235 |
if args.seed: |
|
|
236 |
kwargs['seed'] = args.seed |
|
|
237 |
pipeline.dock('autodock_vina', receptor, **kwargs) |
|
|
238 |
|
|
|
239 |
# Rescoring |
|
|
240 |
for score in args.score: |
|
|
241 |
for sf_name in ['nnscore', 'rfscore', 'plec', 'autodock_vina']: |
|
|
242 |
if score.startswith(sf_name): |
|
|
243 |
pipeline.score(score, receptor) |
|
|
244 |
|
|
|
245 |
for score_file in args.score_file: |
|
|
246 |
if isfile(score_file): # load pickle |
|
|
247 |
sf = scorer.load(score_file) |
|
|
248 |
pipeline.score(sf, receptor) |
|
|
249 |
else: |
|
|
250 |
raise IOError('Could not read pickle file %s' % score_file) |
|
|
251 |
|
|
|
252 |
# Write to file or STDOUT |
|
|
253 |
if args.out_file: |
|
|
254 |
if args.out_format: |
|
|
255 |
fmt = args.out_format |
|
|
256 |
else: # autodiscover |
|
|
257 |
tmp = args.out_file.split('.') |
|
|
258 |
if tmp[-1] == 'gz': |
|
|
259 |
fmt = tmp[-2] |
|
|
260 |
else: |
|
|
261 |
fmt = tmp[-1] |
|
|
262 |
if not fmt: |
|
|
263 |
raise ValueError('No output format nor output file specified.') |
|
|
264 |
if fmt == 'csv': |
|
|
265 |
pipeline.write_csv(args.out_file, fields=args.save_fields) |
|
|
266 |
else: |
|
|
267 |
pipeline.write(fmt, args.out_file) |
|
|
268 |
else: |
|
|
269 |
fmt = args.out_format |
|
|
270 |
if not fmt: |
|
|
271 |
raise ValueError('No output format nor output file specified.') |
|
|
272 |
if fmt == 'csv': |
|
|
273 |
pipeline.write_csv(sys.stdout, fields=args.save_fields) |
|
|
274 |
else: |
|
|
275 |
for lig in pipeline.fetch(): |
|
|
276 |
sys.stdout.write(lig.write(fmt)) |
|
|
277 |
|
|
|
278 |
|
|
|
279 |
if __name__ == '__main__': |
|
|
280 |
# On Windows calling this function is necessary. |
|
|
281 |
# On Linux/OSX it does nothing. |
|
|
282 |
multiprocessing.freeze_support() |
|
|
283 |
main() |