[3b722e]: / bin / oddt_debug

Download this file

103 lines (87 with data), 3.7 kB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import sys
import argparse
import csv
import gzip

import six

import oddt

# arguments
parser = argparse.ArgumentParser(description='Open Drug Discovery (ODDT) command line tools')
parser.add_argument('--toolkit', dest='toolkit', choices=['ob', 'rdk'], default='ob', help='Choose which toolkit should be used for calculations, either "ob" (OpenBabel) or "rdkit" (RDKit) (default: ob)')
parser.add_argument('--version', action='version', version='%(prog)s ' + oddt.__version__)

# in/out files and formats
parser.add_argument('in_file', nargs='+', help='Input files of formats supported by toolkit.')
parser.add_argument('-i', dest='in_format', help='Input file(s) format')
parser.add_argument('-O', '--output', dest='out_file', help='Output file')

# descriptors
group = parser.add_argument_group('Generate CSV from descriptors, scoring functions and docking software')
group.add_argument('--descriptors',
                   choices=['autodock_vina',
                            'oddt_vina',
                            'nnscore',
                            'binana',
                            'rfscore_v1',
                            'rfscore_v2',
                            'rfscore_v3',
                            ],
                   help='Choose docking software to be used',
                   )
group.add_argument('--receptor', help='Protein file')

args = parser.parse_args()


# Switch toolkits
if 'toolkit' in args:
    if args.toolkit == 'ob':
        from oddt.toolkits import ob
        oddt.toolkit = ob
    elif args.toolkit == 'rdk':
        from oddt.toolkits import rdk
        oddt.toolkit = rdk

# load protein once
if args.receptor:
    extension = args.receptor.split('.')[-1]
    receptor = six.next(oddt.toolkit.readfile(extension, args.receptor))
    receptor.protein = True

# Load descriptor genearator
if args.descriptors.startswith('rfscore'):
    from oddt.scoring.functions import rfscore
    descriptor_generator = rfscore(protein=receptor, version=int(args.descriptors.split('_')[1][1:])).descriptor_generator
elif args.descriptors == 'nnscore':
    from oddt.scoring.functions import nnscore
    descriptor_generator = nnscore(protein=receptor).descriptor_generator
elif args.descriptors == 'binana':
    from oddt.scoring.descriptors.binana import binana_descriptor
    descriptor_generator = binana_descriptor(protein=receptor)
elif args.descriptors == 'autodock_vina':
    from oddt.scoring.descriptors import autodock_vina_descriptor
    descriptor_generator = autodock_vina_descriptor(protein=receptor)
elif args.descriptors == 'oddt_vina':
    from oddt.scoring.descriptors import oddt_vina_descriptor
    descriptor_generator = oddt_vina_descriptor(protein=receptor)

assert descriptor_generator is not None, 'No descriptor generator has been chosen.'

if args.out_file:
    if args.out_file.split('.')[-1] == 'gz':
        out_file = gzip.open(args.out_file, 'wb+')
    else:
        out_file = open(args.out_file, 'wb+')
else:
    out_file = sys.stdout

# Print header if possible
if hasattr(descriptor_generator, 'titles'):
    print(','.join(descriptor_generator.titles), file=out_file)

# Read files
for in_file in args.in_file:
    if args.in_format:
        fmt = args.out_format
    else:  # autodiscover
        tmp = in_file.split('.')
        if tmp[-1] == 'gz':
            fmt = tmp[-2]
        else:
            fmt = tmp[-1]
    for mol in oddt.toolkit.readfile(fmt, in_file):
        if mol:
            print(','.join(map(str, descriptor_generator.build([mol]).tolist()[0])), file=out_file)

if args.out_file:
    out_file.close()