[3b722e]: / bin / oddt_cli

Download this file

284 lines (250 with data), 10.7 kB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys

# FIX Windows multiprocessing
# Module multiprocessing is organized differently in Python 3.4+
try:
    # Python 3.4+
    if sys.platform.startswith('win'):
        import multiprocessing.popen_spawn_win32 as forking
    else:
        import multiprocessing.popen_fork as forking
except ImportError:
    import multiprocessing.forking as forking

if sys.platform.startswith('win'):
    # First define a modified version of Popen.
    class _Popen(forking.Popen):
        def __init__(self, *args, **kw):
            if hasattr(sys, 'frozen'):
                # We have to set original _MEIPASS2 value from sys._MEIPASS
                # to get --onefile mode working.
                os.putenv('_MEIPASS2', sys._MEIPASS)
            try:
                super(_Popen, self).__init__(*args, **kw)
            finally:
                if hasattr(sys, 'frozen'):
                    # On some platforms (e.g. AIX) 'os.unsetenv()' is not
                    # available. In those cases we cannot delete the variable
                    # but only set it to the empty string. The bootloader
                    # can handle this case.
                    if hasattr(os, 'unsetenv'):
                        os.unsetenv('_MEIPASS2')
                    else:
                        os.putenv('_MEIPASS2', '')

    # Second override 'Popen' class with our modified version.
    forking.Popen = _Popen
# END Fix Windows multiprocessing
import multiprocessing
import six
from os.path import isfile
from ast import literal_eval
import argparse

import oddt
from oddt.scoring import scorer


def main():
    # arguments
    parser = argparse.ArgumentParser(
        description='Open Drug Discovery (ODDT) command line tools')
    parser.add_argument('--toolkit',
                        dest='toolkit',
                        choices=['ob', 'rdk'],
                        default='ob',
                        help=('Choose which toolkit should be used for '
                              'calculations, either "ob" (OpenBabel) or '
                              '"rdkit" (RDKit) (default: ob)'))
    parser.add_argument('-n', '--n_cpu',
                        dest='n_cpu',
                        type=int,
                        help=('The number of parallel processes. '
                              '-1 automatically assigns maximum number of CPUs.'
                              ' (default=-1)'))
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + oddt.__version__)

    parser.add_argument('-c', '--chunksize',
                        dest='chunksize',
                        type=int,
                        default=100,
                        help=('The number of molecules to process in a chunk. '
                              ' (default=100)'))

    # in/out files and formats
    parser.add_argument('in_file', nargs='+',
                        help='Input files of formats supported by toolkit.')
    parser.add_argument('-i', dest='in_format', help='Input file(s) format')
    parser.add_argument('-o', dest='out_format', help='Output file format')
    parser.add_argument('-O', '--output', dest='out_file', help='Output file')

    # filter
    group = parser.add_argument_group('Filtering')
    group.add_argument('--filter',
                       dest='filter',
                       action='append',
                       default=[],
                       help=('Choose built-in filters to be used (eg. "ro5", '
                             '"ro3", "pains")'))

    # fingerprints
    group = parser.add_argument_group('Similarity searching')
    group.add_argument('--similarity',
                       dest='similarity',
                       action='append',
                       default=[],
                       choices=['ifp', 'sifp', 'usr', 'usr_cat', 'electroshape'],
                       help='Choose similarity method to use (eg. "ifp", "sifp", '
                       '"usr", "usr_cat", "electroshape")')

    group.add_argument('--cutoff',
                       dest='cutoff',
                       type=float,
                       default=0.9,
                       help=('Similarity cufoff below which molecules will be'
                             ' ignored.'))

    group.add_argument('--query',
                       dest='query',
                       action='append',
                       help='Query molecule(s) for similarity searching')

    # docking
    group = parser.add_argument_group('Protein-Ligand docking')
    group.add_argument('--dock',
                       dest='dock',
                       choices=['autodock_vina'],
                       help='Choose docking software to be used')
    group.add_argument('--receptor', help='Protein file')
    group.add_argument('--auto_ligand',
                       help='Docking Box is determined on that ligand')
    group.add_argument('--center', type=literal_eval,
                       help='Docking Box center (x,y,z)')
    group.add_argument('--size', type=literal_eval,
                       help='Docking Box dimentions  (x,y,z)')
    group.add_argument('--exhaustiveness', default=8, type=int,
                       help='Exhaustiveness of docking')
    group.add_argument('--seed', help='Random Seed')

    # scoring

    # generate scoring functions options
    sf_choices = ['autodock_vina', 'rfscore', 'nnscore']
    for v in [1, 2, 3]:
        sf_choices.append('rfscore_v%i' % v)
    for v in ['linear', 'nn', 'rf']:
        sf_choices.append('plec%s' % v)

    for pdbbind_version in [2007, 2012, 2013, 2014, 2015, 2016]:
        for v in [1, 2, 3]:
            sf_choices.append('rfscore_v%i_pdbbind%i' % (v, pdbbind_version))
        sf_choices.append('nnscore_pdbbind%i' % (pdbbind_version))

    # PLECscore is supported only for v2016+
    for pdbbind_version in [2016]:
        for v in ['linear', 'nn', 'rf']:
            sf_choices.append('plec%s_pdbbind%i' % (v, pdbbind_version))

    group = parser.add_argument_group('Rescoring')
    group.add_argument('--score',
                       dest='score',
                       choices=sf_choices,
                       action='append',
                       default=[],
                       help='Choose built-in scoring function to be used')
    group.add_argument('--score_file',
                       dest='score_file',
                       action='append',
                       default=[],
                       help='Choose ODDT scoring function saved to file (pickle)')

    parser.add_argument('--field',
                        dest='save_fields',
                        action='append',
                        default=[],
                        help=('Field to save (eg. in CSV). Each field should be'
                              ' specified separately.'))

    args = parser.parse_args()

    # Switch toolkits
    if 'toolkit' in args:
        if (args.toolkit == 'rdk' or ('ODDT_TOOLKIT' in os.environ and
                                      os.environ['ODDT_TOOLKIT'] == 'rdk')):
            from oddt.toolkits import rdk
            oddt.toolkit = rdk
        else:  # OB as fallback
            from oddt.toolkits import ob
            oddt.toolkit = ob

    from oddt.virtualscreening import virtualscreening as vs

    # Create pipeline for docking and rescoring
    pipeline = vs(n_cpu=args.n_cpu if 'n_cpu' in args else -1,
                  chunksize=args.chunksize)
    for f in args.in_file:
        if args.in_format:
            fmt = args.in_format
        else:  # autodiscover
            tmp = f.split('.')
            if tmp[-1] == 'gz':
                fmt = tmp[-2]
            else:
                fmt = tmp[-1]
        if isfile(f):
            pipeline.load_ligands(fmt, f)  # add loading ligands from STDIN?
        else:
            raise IOError("File does not exist: '%s'" % f)

    # Filter ligands
    for filter in args.filter:
        pipeline.apply_filter(filter)

    receptor = None  # Not all similarity methods require receptor/protein
    # load protein once
    if args.receptor:
        extension = args.receptor.split('.')[-1]
        receptor = six.next(oddt.toolkit.readfile(extension, args.receptor))
        receptor.protein = True

    if args.query:
        query = [six.next(oddt.toolkit.readfile(q.split('.')[-1], q))
                 for q in args.query]

    for i, sim in enumerate(args.similarity):
        pipeline.similarity(sim, query, protein=receptor, cutoff=args.cutoff)

    # Docking
    if args.dock == 'autodock_vina':
        kwargs = {}
        if args.center:
            kwargs['center'] = args.center
        if args.size:
            kwargs['size'] = args.size
        if args.size:
            kwargs['size'] = args.size
        if args.auto_ligand:
            kwargs['auto_ligand'] = args.auto_ligand
        if args.exhaustiveness:
            kwargs['exhaustiveness'] = args.exhaustiveness
        if args.seed:
            kwargs['seed'] = args.seed
        pipeline.dock('autodock_vina', receptor, **kwargs)

    # Rescoring
    for score in args.score:
        for sf_name in ['nnscore', 'rfscore', 'plec', 'autodock_vina']:
            if score.startswith(sf_name):
                pipeline.score(score, receptor)

    for score_file in args.score_file:
        if isfile(score_file):  # load pickle
            sf = scorer.load(score_file)
            pipeline.score(sf, receptor)
        else:
            raise IOError('Could not read pickle file %s' % score_file)

    # Write to file or STDOUT
    if args.out_file:
        if args.out_format:
            fmt = args.out_format
        else:  # autodiscover
            tmp = args.out_file.split('.')
            if tmp[-1] == 'gz':
                fmt = tmp[-2]
            else:
                fmt = tmp[-1]
        if not fmt:
            raise ValueError('No output format nor output file specified.')
        if fmt == 'csv':
            pipeline.write_csv(args.out_file, fields=args.save_fields)
        else:
            pipeline.write(fmt, args.out_file)
    else:
        fmt = args.out_format
        if not fmt:
            raise ValueError('No output format nor output file specified.')
        if fmt == 'csv':
            pipeline.write_csv(sys.stdout, fields=args.save_fields)
        else:
            for lig in pipeline.fetch():
                sys.stdout.write(lig.write(fmt))


if __name__ == '__main__':
    # On Windows calling this function is necessary.
    # On Linux/OSX it does nothing.
    multiprocessing.freeze_support()
    main()