Diff of /bin/oddt_cli [000000] .. [3b722e]

Switch to unified view

a b/bin/oddt_cli
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
import os
4
import sys
5
6
# FIX Windows multiprocessing
7
# Module multiprocessing is organized differently in Python 3.4+
8
try:
9
    # Python 3.4+
10
    if sys.platform.startswith('win'):
11
        import multiprocessing.popen_spawn_win32 as forking
12
    else:
13
        import multiprocessing.popen_fork as forking
14
except ImportError:
15
    import multiprocessing.forking as forking
16
17
if sys.platform.startswith('win'):
18
    # First define a modified version of Popen.
19
    class _Popen(forking.Popen):
20
        def __init__(self, *args, **kw):
21
            if hasattr(sys, 'frozen'):
22
                # We have to set original _MEIPASS2 value from sys._MEIPASS
23
                # to get --onefile mode working.
24
                os.putenv('_MEIPASS2', sys._MEIPASS)
25
            try:
26
                super(_Popen, self).__init__(*args, **kw)
27
            finally:
28
                if hasattr(sys, 'frozen'):
29
                    # On some platforms (e.g. AIX) 'os.unsetenv()' is not
30
                    # available. In those cases we cannot delete the variable
31
                    # but only set it to the empty string. The bootloader
32
                    # can handle this case.
33
                    if hasattr(os, 'unsetenv'):
34
                        os.unsetenv('_MEIPASS2')
35
                    else:
36
                        os.putenv('_MEIPASS2', '')
37
38
    # Second override 'Popen' class with our modified version.
39
    forking.Popen = _Popen
40
# END Fix Windows multiprocessing
41
import multiprocessing
42
import six
43
from os.path import isfile
44
from ast import literal_eval
45
import argparse
46
47
import oddt
48
from oddt.scoring import scorer
49
50
51
def main():
52
    # arguments
53
    parser = argparse.ArgumentParser(
54
        description='Open Drug Discovery (ODDT) command line tools')
55
    parser.add_argument('--toolkit',
56
                        dest='toolkit',
57
                        choices=['ob', 'rdk'],
58
                        default='ob',
59
                        help=('Choose which toolkit should be used for '
60
                              'calculations, either "ob" (OpenBabel) or '
61
                              '"rdkit" (RDKit) (default: ob)'))
62
    parser.add_argument('-n', '--n_cpu',
63
                        dest='n_cpu',
64
                        type=int,
65
                        help=('The number of parallel processes. '
66
                              '-1 automatically assigns maximum number of CPUs.'
67
                              ' (default=-1)'))
68
    parser.add_argument('--version',
69
                        action='version',
70
                        version='%(prog)s ' + oddt.__version__)
71
72
    parser.add_argument('-c', '--chunksize',
73
                        dest='chunksize',
74
                        type=int,
75
                        default=100,
76
                        help=('The number of molecules to process in a chunk. '
77
                              ' (default=100)'))
78
79
    # in/out files and formats
80
    parser.add_argument('in_file', nargs='+',
81
                        help='Input files of formats supported by toolkit.')
82
    parser.add_argument('-i', dest='in_format', help='Input file(s) format')
83
    parser.add_argument('-o', dest='out_format', help='Output file format')
84
    parser.add_argument('-O', '--output', dest='out_file', help='Output file')
85
86
    # filter
87
    group = parser.add_argument_group('Filtering')
88
    group.add_argument('--filter',
89
                       dest='filter',
90
                       action='append',
91
                       default=[],
92
                       help=('Choose built-in filters to be used (eg. "ro5", '
93
                             '"ro3", "pains")'))
94
95
    # fingerprints
96
    group = parser.add_argument_group('Similarity searching')
97
    group.add_argument('--similarity',
98
                       dest='similarity',
99
                       action='append',
100
                       default=[],
101
                       choices=['ifp', 'sifp', 'usr', 'usr_cat', 'electroshape'],
102
                       help='Choose similarity method to use (eg. "ifp", "sifp", '
103
                       '"usr", "usr_cat", "electroshape")')
104
105
    group.add_argument('--cutoff',
106
                       dest='cutoff',
107
                       type=float,
108
                       default=0.9,
109
                       help=('Similarity cufoff below which molecules will be'
110
                             ' ignored.'))
111
112
    group.add_argument('--query',
113
                       dest='query',
114
                       action='append',
115
                       help='Query molecule(s) for similarity searching')
116
117
    # docking
118
    group = parser.add_argument_group('Protein-Ligand docking')
119
    group.add_argument('--dock',
120
                       dest='dock',
121
                       choices=['autodock_vina'],
122
                       help='Choose docking software to be used')
123
    group.add_argument('--receptor', help='Protein file')
124
    group.add_argument('--auto_ligand',
125
                       help='Docking Box is determined on that ligand')
126
    group.add_argument('--center', type=literal_eval,
127
                       help='Docking Box center (x,y,z)')
128
    group.add_argument('--size', type=literal_eval,
129
                       help='Docking Box dimentions  (x,y,z)')
130
    group.add_argument('--exhaustiveness', default=8, type=int,
131
                       help='Exhaustiveness of docking')
132
    group.add_argument('--seed', help='Random Seed')
133
134
    # scoring
135
136
    # generate scoring functions options
137
    sf_choices = ['autodock_vina', 'rfscore', 'nnscore']
138
    for v in [1, 2, 3]:
139
        sf_choices.append('rfscore_v%i' % v)
140
    for v in ['linear', 'nn', 'rf']:
141
        sf_choices.append('plec%s' % v)
142
143
    for pdbbind_version in [2007, 2012, 2013, 2014, 2015, 2016]:
144
        for v in [1, 2, 3]:
145
            sf_choices.append('rfscore_v%i_pdbbind%i' % (v, pdbbind_version))
146
        sf_choices.append('nnscore_pdbbind%i' % (pdbbind_version))
147
148
    # PLECscore is supported only for v2016+
149
    for pdbbind_version in [2016]:
150
        for v in ['linear', 'nn', 'rf']:
151
            sf_choices.append('plec%s_pdbbind%i' % (v, pdbbind_version))
152
153
    group = parser.add_argument_group('Rescoring')
154
    group.add_argument('--score',
155
                       dest='score',
156
                       choices=sf_choices,
157
                       action='append',
158
                       default=[],
159
                       help='Choose built-in scoring function to be used')
160
    group.add_argument('--score_file',
161
                       dest='score_file',
162
                       action='append',
163
                       default=[],
164
                       help='Choose ODDT scoring function saved to file (pickle)')
165
166
    parser.add_argument('--field',
167
                        dest='save_fields',
168
                        action='append',
169
                        default=[],
170
                        help=('Field to save (eg. in CSV). Each field should be'
171
                              ' specified separately.'))
172
173
    args = parser.parse_args()
174
175
    # Switch toolkits
176
    if 'toolkit' in args:
177
        if (args.toolkit == 'rdk' or ('ODDT_TOOLKIT' in os.environ and
178
                                      os.environ['ODDT_TOOLKIT'] == 'rdk')):
179
            from oddt.toolkits import rdk
180
            oddt.toolkit = rdk
181
        else:  # OB as fallback
182
            from oddt.toolkits import ob
183
            oddt.toolkit = ob
184
185
    from oddt.virtualscreening import virtualscreening as vs
186
187
    # Create pipeline for docking and rescoring
188
    pipeline = vs(n_cpu=args.n_cpu if 'n_cpu' in args else -1,
189
                  chunksize=args.chunksize)
190
    for f in args.in_file:
191
        if args.in_format:
192
            fmt = args.in_format
193
        else:  # autodiscover
194
            tmp = f.split('.')
195
            if tmp[-1] == 'gz':
196
                fmt = tmp[-2]
197
            else:
198
                fmt = tmp[-1]
199
        if isfile(f):
200
            pipeline.load_ligands(fmt, f)  # add loading ligands from STDIN?
201
        else:
202
            raise IOError("File does not exist: '%s'" % f)
203
204
    # Filter ligands
205
    for filter in args.filter:
206
        pipeline.apply_filter(filter)
207
208
    receptor = None  # Not all similarity methods require receptor/protein
209
    # load protein once
210
    if args.receptor:
211
        extension = args.receptor.split('.')[-1]
212
        receptor = six.next(oddt.toolkit.readfile(extension, args.receptor))
213
        receptor.protein = True
214
215
    if args.query:
216
        query = [six.next(oddt.toolkit.readfile(q.split('.')[-1], q))
217
                 for q in args.query]
218
219
    for i, sim in enumerate(args.similarity):
220
        pipeline.similarity(sim, query, protein=receptor, cutoff=args.cutoff)
221
222
    # Docking
223
    if args.dock == 'autodock_vina':
224
        kwargs = {}
225
        if args.center:
226
            kwargs['center'] = args.center
227
        if args.size:
228
            kwargs['size'] = args.size
229
        if args.size:
230
            kwargs['size'] = args.size
231
        if args.auto_ligand:
232
            kwargs['auto_ligand'] = args.auto_ligand
233
        if args.exhaustiveness:
234
            kwargs['exhaustiveness'] = args.exhaustiveness
235
        if args.seed:
236
            kwargs['seed'] = args.seed
237
        pipeline.dock('autodock_vina', receptor, **kwargs)
238
239
    # Rescoring
240
    for score in args.score:
241
        for sf_name in ['nnscore', 'rfscore', 'plec', 'autodock_vina']:
242
            if score.startswith(sf_name):
243
                pipeline.score(score, receptor)
244
245
    for score_file in args.score_file:
246
        if isfile(score_file):  # load pickle
247
            sf = scorer.load(score_file)
248
            pipeline.score(sf, receptor)
249
        else:
250
            raise IOError('Could not read pickle file %s' % score_file)
251
252
    # Write to file or STDOUT
253
    if args.out_file:
254
        if args.out_format:
255
            fmt = args.out_format
256
        else:  # autodiscover
257
            tmp = args.out_file.split('.')
258
            if tmp[-1] == 'gz':
259
                fmt = tmp[-2]
260
            else:
261
                fmt = tmp[-1]
262
        if not fmt:
263
            raise ValueError('No output format nor output file specified.')
264
        if fmt == 'csv':
265
            pipeline.write_csv(args.out_file, fields=args.save_fields)
266
        else:
267
            pipeline.write(fmt, args.out_file)
268
    else:
269
        fmt = args.out_format
270
        if not fmt:
271
            raise ValueError('No output format nor output file specified.')
272
        if fmt == 'csv':
273
            pipeline.write_csv(sys.stdout, fields=args.save_fields)
274
        else:
275
            for lig in pipeline.fetch():
276
                sys.stdout.write(lig.write(fmt))
277
278
279
if __name__ == '__main__':
280
    # On Windows calling this function is necessary.
281
    # On Linux/OSX it does nothing.
282
    multiprocessing.freeze_support()
283
    main()