284 lines (250 with data), 10.7 kB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
# FIX Windows multiprocessing
# Module multiprocessing is organized differently in Python 3.4+
try:
# Python 3.4+
if sys.platform.startswith('win'):
import multiprocessing.popen_spawn_win32 as forking
else:
import multiprocessing.popen_fork as forking
except ImportError:
import multiprocessing.forking as forking
if sys.platform.startswith('win'):
# First define a modified version of Popen.
class _Popen(forking.Popen):
def __init__(self, *args, **kw):
if hasattr(sys, 'frozen'):
# We have to set original _MEIPASS2 value from sys._MEIPASS
# to get --onefile mode working.
os.putenv('_MEIPASS2', sys._MEIPASS)
try:
super(_Popen, self).__init__(*args, **kw)
finally:
if hasattr(sys, 'frozen'):
# On some platforms (e.g. AIX) 'os.unsetenv()' is not
# available. In those cases we cannot delete the variable
# but only set it to the empty string. The bootloader
# can handle this case.
if hasattr(os, 'unsetenv'):
os.unsetenv('_MEIPASS2')
else:
os.putenv('_MEIPASS2', '')
# Second override 'Popen' class with our modified version.
forking.Popen = _Popen
# END Fix Windows multiprocessing
import multiprocessing
import six
from os.path import isfile
from ast import literal_eval
import argparse
import oddt
from oddt.scoring import scorer
def main():
# arguments
parser = argparse.ArgumentParser(
description='Open Drug Discovery (ODDT) command line tools')
parser.add_argument('--toolkit',
dest='toolkit',
choices=['ob', 'rdk'],
default='ob',
help=('Choose which toolkit should be used for '
'calculations, either "ob" (OpenBabel) or '
'"rdkit" (RDKit) (default: ob)'))
parser.add_argument('-n', '--n_cpu',
dest='n_cpu',
type=int,
help=('The number of parallel processes. '
'-1 automatically assigns maximum number of CPUs.'
' (default=-1)'))
parser.add_argument('--version',
action='version',
version='%(prog)s ' + oddt.__version__)
parser.add_argument('-c', '--chunksize',
dest='chunksize',
type=int,
default=100,
help=('The number of molecules to process in a chunk. '
' (default=100)'))
# in/out files and formats
parser.add_argument('in_file', nargs='+',
help='Input files of formats supported by toolkit.')
parser.add_argument('-i', dest='in_format', help='Input file(s) format')
parser.add_argument('-o', dest='out_format', help='Output file format')
parser.add_argument('-O', '--output', dest='out_file', help='Output file')
# filter
group = parser.add_argument_group('Filtering')
group.add_argument('--filter',
dest='filter',
action='append',
default=[],
help=('Choose built-in filters to be used (eg. "ro5", '
'"ro3", "pains")'))
# fingerprints
group = parser.add_argument_group('Similarity searching')
group.add_argument('--similarity',
dest='similarity',
action='append',
default=[],
choices=['ifp', 'sifp', 'usr', 'usr_cat', 'electroshape'],
help='Choose similarity method to use (eg. "ifp", "sifp", '
'"usr", "usr_cat", "electroshape")')
group.add_argument('--cutoff',
dest='cutoff',
type=float,
default=0.9,
help=('Similarity cufoff below which molecules will be'
' ignored.'))
group.add_argument('--query',
dest='query',
action='append',
help='Query molecule(s) for similarity searching')
# docking
group = parser.add_argument_group('Protein-Ligand docking')
group.add_argument('--dock',
dest='dock',
choices=['autodock_vina'],
help='Choose docking software to be used')
group.add_argument('--receptor', help='Protein file')
group.add_argument('--auto_ligand',
help='Docking Box is determined on that ligand')
group.add_argument('--center', type=literal_eval,
help='Docking Box center (x,y,z)')
group.add_argument('--size', type=literal_eval,
help='Docking Box dimentions (x,y,z)')
group.add_argument('--exhaustiveness', default=8, type=int,
help='Exhaustiveness of docking')
group.add_argument('--seed', help='Random Seed')
# scoring
# generate scoring functions options
sf_choices = ['autodock_vina', 'rfscore', 'nnscore']
for v in [1, 2, 3]:
sf_choices.append('rfscore_v%i' % v)
for v in ['linear', 'nn', 'rf']:
sf_choices.append('plec%s' % v)
for pdbbind_version in [2007, 2012, 2013, 2014, 2015, 2016]:
for v in [1, 2, 3]:
sf_choices.append('rfscore_v%i_pdbbind%i' % (v, pdbbind_version))
sf_choices.append('nnscore_pdbbind%i' % (pdbbind_version))
# PLECscore is supported only for v2016+
for pdbbind_version in [2016]:
for v in ['linear', 'nn', 'rf']:
sf_choices.append('plec%s_pdbbind%i' % (v, pdbbind_version))
group = parser.add_argument_group('Rescoring')
group.add_argument('--score',
dest='score',
choices=sf_choices,
action='append',
default=[],
help='Choose built-in scoring function to be used')
group.add_argument('--score_file',
dest='score_file',
action='append',
default=[],
help='Choose ODDT scoring function saved to file (pickle)')
parser.add_argument('--field',
dest='save_fields',
action='append',
default=[],
help=('Field to save (eg. in CSV). Each field should be'
' specified separately.'))
args = parser.parse_args()
# Switch toolkits
if 'toolkit' in args:
if (args.toolkit == 'rdk' or ('ODDT_TOOLKIT' in os.environ and
os.environ['ODDT_TOOLKIT'] == 'rdk')):
from oddt.toolkits import rdk
oddt.toolkit = rdk
else: # OB as fallback
from oddt.toolkits import ob
oddt.toolkit = ob
from oddt.virtualscreening import virtualscreening as vs
# Create pipeline for docking and rescoring
pipeline = vs(n_cpu=args.n_cpu if 'n_cpu' in args else -1,
chunksize=args.chunksize)
for f in args.in_file:
if args.in_format:
fmt = args.in_format
else: # autodiscover
tmp = f.split('.')
if tmp[-1] == 'gz':
fmt = tmp[-2]
else:
fmt = tmp[-1]
if isfile(f):
pipeline.load_ligands(fmt, f) # add loading ligands from STDIN?
else:
raise IOError("File does not exist: '%s'" % f)
# Filter ligands
for filter in args.filter:
pipeline.apply_filter(filter)
receptor = None # Not all similarity methods require receptor/protein
# load protein once
if args.receptor:
extension = args.receptor.split('.')[-1]
receptor = six.next(oddt.toolkit.readfile(extension, args.receptor))
receptor.protein = True
if args.query:
query = [six.next(oddt.toolkit.readfile(q.split('.')[-1], q))
for q in args.query]
for i, sim in enumerate(args.similarity):
pipeline.similarity(sim, query, protein=receptor, cutoff=args.cutoff)
# Docking
if args.dock == 'autodock_vina':
kwargs = {}
if args.center:
kwargs['center'] = args.center
if args.size:
kwargs['size'] = args.size
if args.size:
kwargs['size'] = args.size
if args.auto_ligand:
kwargs['auto_ligand'] = args.auto_ligand
if args.exhaustiveness:
kwargs['exhaustiveness'] = args.exhaustiveness
if args.seed:
kwargs['seed'] = args.seed
pipeline.dock('autodock_vina', receptor, **kwargs)
# Rescoring
for score in args.score:
for sf_name in ['nnscore', 'rfscore', 'plec', 'autodock_vina']:
if score.startswith(sf_name):
pipeline.score(score, receptor)
for score_file in args.score_file:
if isfile(score_file): # load pickle
sf = scorer.load(score_file)
pipeline.score(sf, receptor)
else:
raise IOError('Could not read pickle file %s' % score_file)
# Write to file or STDOUT
if args.out_file:
if args.out_format:
fmt = args.out_format
else: # autodiscover
tmp = args.out_file.split('.')
if tmp[-1] == 'gz':
fmt = tmp[-2]
else:
fmt = tmp[-1]
if not fmt:
raise ValueError('No output format nor output file specified.')
if fmt == 'csv':
pipeline.write_csv(args.out_file, fields=args.save_fields)
else:
pipeline.write(fmt, args.out_file)
else:
fmt = args.out_format
if not fmt:
raise ValueError('No output format nor output file specified.')
if fmt == 'csv':
pipeline.write_csv(sys.stdout, fields=args.save_fields)
else:
for lig in pipeline.fetch():
sys.stdout.write(lig.write(fmt))
if __name__ == '__main__':
# On Windows calling this function is necessary.
# On Linux/OSX it does nothing.
multiprocessing.freeze_support()
main()