Diff of /bin/oddt_cli [000000] .. [3b722e]

Switch to side-by-side view

--- a
+++ b/bin/oddt_cli
@@ -0,0 +1,283 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import sys
+
+# FIX Windows multiprocessing
+# Module multiprocessing is organized differently in Python 3.4+
+try:
+    # Python 3.4+
+    if sys.platform.startswith('win'):
+        import multiprocessing.popen_spawn_win32 as forking
+    else:
+        import multiprocessing.popen_fork as forking
+except ImportError:
+    import multiprocessing.forking as forking
+
+if sys.platform.startswith('win'):
+    # First define a modified version of Popen.
+    class _Popen(forking.Popen):
+        def __init__(self, *args, **kw):
+            if hasattr(sys, 'frozen'):
+                # We have to set original _MEIPASS2 value from sys._MEIPASS
+                # to get --onefile mode working.
+                os.putenv('_MEIPASS2', sys._MEIPASS)
+            try:
+                super(_Popen, self).__init__(*args, **kw)
+            finally:
+                if hasattr(sys, 'frozen'):
+                    # On some platforms (e.g. AIX) 'os.unsetenv()' is not
+                    # available. In those cases we cannot delete the variable
+                    # but only set it to the empty string. The bootloader
+                    # can handle this case.
+                    if hasattr(os, 'unsetenv'):
+                        os.unsetenv('_MEIPASS2')
+                    else:
+                        os.putenv('_MEIPASS2', '')
+
+    # Second override 'Popen' class with our modified version.
+    forking.Popen = _Popen
+# END Fix Windows multiprocessing
+import multiprocessing
+import six
+from os.path import isfile
+from ast import literal_eval
+import argparse
+
+import oddt
+from oddt.scoring import scorer
+
+
+def main():
+    # arguments
+    parser = argparse.ArgumentParser(
+        description='Open Drug Discovery (ODDT) command line tools')
+    parser.add_argument('--toolkit',
+                        dest='toolkit',
+                        choices=['ob', 'rdk'],
+                        default='ob',
+                        help=('Choose which toolkit should be used for '
+                              'calculations, either "ob" (OpenBabel) or '
+                              '"rdkit" (RDKit) (default: ob)'))
+    parser.add_argument('-n', '--n_cpu',
+                        dest='n_cpu',
+                        type=int,
+                        help=('The number of parallel processes. '
+                              '-1 automatically assigns maximum number of CPUs.'
+                              ' (default=-1)'))
+    parser.add_argument('--version',
+                        action='version',
+                        version='%(prog)s ' + oddt.__version__)
+
+    parser.add_argument('-c', '--chunksize',
+                        dest='chunksize',
+                        type=int,
+                        default=100,
+                        help=('The number of molecules to process in a chunk. '
+                              ' (default=100)'))
+
+    # in/out files and formats
+    parser.add_argument('in_file', nargs='+',
+                        help='Input files of formats supported by toolkit.')
+    parser.add_argument('-i', dest='in_format', help='Input file(s) format')
+    parser.add_argument('-o', dest='out_format', help='Output file format')
+    parser.add_argument('-O', '--output', dest='out_file', help='Output file')
+
+    # filter
+    group = parser.add_argument_group('Filtering')
+    group.add_argument('--filter',
+                       dest='filter',
+                       action='append',
+                       default=[],
+                       help=('Choose built-in filters to be used (eg. "ro5", '
+                             '"ro3", "pains")'))
+
+    # fingerprints
+    group = parser.add_argument_group('Similarity searching')
+    group.add_argument('--similarity',
+                       dest='similarity',
+                       action='append',
+                       default=[],
+                       choices=['ifp', 'sifp', 'usr', 'usr_cat', 'electroshape'],
+                       help='Choose similarity method to use (eg. "ifp", "sifp", '
+                       '"usr", "usr_cat", "electroshape")')
+
+    group.add_argument('--cutoff',
+                       dest='cutoff',
+                       type=float,
+                       default=0.9,
+                       help=('Similarity cufoff below which molecules will be'
+                             ' ignored.'))
+
+    group.add_argument('--query',
+                       dest='query',
+                       action='append',
+                       help='Query molecule(s) for similarity searching')
+
+    # docking
+    group = parser.add_argument_group('Protein-Ligand docking')
+    group.add_argument('--dock',
+                       dest='dock',
+                       choices=['autodock_vina'],
+                       help='Choose docking software to be used')
+    group.add_argument('--receptor', help='Protein file')
+    group.add_argument('--auto_ligand',
+                       help='Docking Box is determined on that ligand')
+    group.add_argument('--center', type=literal_eval,
+                       help='Docking Box center (x,y,z)')
+    group.add_argument('--size', type=literal_eval,
+                       help='Docking Box dimentions  (x,y,z)')
+    group.add_argument('--exhaustiveness', default=8, type=int,
+                       help='Exhaustiveness of docking')
+    group.add_argument('--seed', help='Random Seed')
+
+    # scoring
+
+    # generate scoring functions options
+    sf_choices = ['autodock_vina', 'rfscore', 'nnscore']
+    for v in [1, 2, 3]:
+        sf_choices.append('rfscore_v%i' % v)
+    for v in ['linear', 'nn', 'rf']:
+        sf_choices.append('plec%s' % v)
+
+    for pdbbind_version in [2007, 2012, 2013, 2014, 2015, 2016]:
+        for v in [1, 2, 3]:
+            sf_choices.append('rfscore_v%i_pdbbind%i' % (v, pdbbind_version))
+        sf_choices.append('nnscore_pdbbind%i' % (pdbbind_version))
+
+    # PLECscore is supported only for v2016+
+    for pdbbind_version in [2016]:
+        for v in ['linear', 'nn', 'rf']:
+            sf_choices.append('plec%s_pdbbind%i' % (v, pdbbind_version))
+
+    group = parser.add_argument_group('Rescoring')
+    group.add_argument('--score',
+                       dest='score',
+                       choices=sf_choices,
+                       action='append',
+                       default=[],
+                       help='Choose built-in scoring function to be used')
+    group.add_argument('--score_file',
+                       dest='score_file',
+                       action='append',
+                       default=[],
+                       help='Choose ODDT scoring function saved to file (pickle)')
+
+    parser.add_argument('--field',
+                        dest='save_fields',
+                        action='append',
+                        default=[],
+                        help=('Field to save (eg. in CSV). Each field should be'
+                              ' specified separately.'))
+
+    args = parser.parse_args()
+
+    # Switch toolkits
+    if 'toolkit' in args:
+        if (args.toolkit == 'rdk' or ('ODDT_TOOLKIT' in os.environ and
+                                      os.environ['ODDT_TOOLKIT'] == 'rdk')):
+            from oddt.toolkits import rdk
+            oddt.toolkit = rdk
+        else:  # OB as fallback
+            from oddt.toolkits import ob
+            oddt.toolkit = ob
+
+    from oddt.virtualscreening import virtualscreening as vs
+
+    # Create pipeline for docking and rescoring
+    pipeline = vs(n_cpu=args.n_cpu if 'n_cpu' in args else -1,
+                  chunksize=args.chunksize)
+    for f in args.in_file:
+        if args.in_format:
+            fmt = args.in_format
+        else:  # autodiscover
+            tmp = f.split('.')
+            if tmp[-1] == 'gz':
+                fmt = tmp[-2]
+            else:
+                fmt = tmp[-1]
+        if isfile(f):
+            pipeline.load_ligands(fmt, f)  # add loading ligands from STDIN?
+        else:
+            raise IOError("File does not exist: '%s'" % f)
+
+    # Filter ligands
+    for filter in args.filter:
+        pipeline.apply_filter(filter)
+
+    receptor = None  # Not all similarity methods require receptor/protein
+    # load protein once
+    if args.receptor:
+        extension = args.receptor.split('.')[-1]
+        receptor = six.next(oddt.toolkit.readfile(extension, args.receptor))
+        receptor.protein = True
+
+    if args.query:
+        query = [six.next(oddt.toolkit.readfile(q.split('.')[-1], q))
+                 for q in args.query]
+
+    for i, sim in enumerate(args.similarity):
+        pipeline.similarity(sim, query, protein=receptor, cutoff=args.cutoff)
+
+    # Docking
+    if args.dock == 'autodock_vina':
+        kwargs = {}
+        if args.center:
+            kwargs['center'] = args.center
+        if args.size:
+            kwargs['size'] = args.size
+        if args.size:
+            kwargs['size'] = args.size
+        if args.auto_ligand:
+            kwargs['auto_ligand'] = args.auto_ligand
+        if args.exhaustiveness:
+            kwargs['exhaustiveness'] = args.exhaustiveness
+        if args.seed:
+            kwargs['seed'] = args.seed
+        pipeline.dock('autodock_vina', receptor, **kwargs)
+
+    # Rescoring
+    for score in args.score:
+        for sf_name in ['nnscore', 'rfscore', 'plec', 'autodock_vina']:
+            if score.startswith(sf_name):
+                pipeline.score(score, receptor)
+
+    for score_file in args.score_file:
+        if isfile(score_file):  # load pickle
+            sf = scorer.load(score_file)
+            pipeline.score(sf, receptor)
+        else:
+            raise IOError('Could not read pickle file %s' % score_file)
+
+    # Write to file or STDOUT
+    if args.out_file:
+        if args.out_format:
+            fmt = args.out_format
+        else:  # autodiscover
+            tmp = args.out_file.split('.')
+            if tmp[-1] == 'gz':
+                fmt = tmp[-2]
+            else:
+                fmt = tmp[-1]
+        if not fmt:
+            raise ValueError('No output format nor output file specified.')
+        if fmt == 'csv':
+            pipeline.write_csv(args.out_file, fields=args.save_fields)
+        else:
+            pipeline.write(fmt, args.out_file)
+    else:
+        fmt = args.out_format
+        if not fmt:
+            raise ValueError('No output format nor output file specified.')
+        if fmt == 'csv':
+            pipeline.write_csv(sys.stdout, fields=args.save_fields)
+        else:
+            for lig in pipeline.fetch():
+                sys.stdout.write(lig.write(fmt))
+
+
+if __name__ == '__main__':
+    # On Windows calling this function is necessary.
+    # On Linux/OSX it does nothing.
+    multiprocessing.freeze_support()
+    main()