[4c33d4]: / bin / exseek

Download this file

299 lines (275 with data), 13.1 kB

#! /usr/bin/env python
import argparse, sys, os, errno
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s')
import yaml
import shutil
import shlex
import subprocess
import re
import exseek
import pkg_resources

steps = (
    'quality_control',
    'quality_control_clean',
    'cutadapt', 
    'bigwig',
    'mapping',
    'count_matrix', 
    'call_domains', 
    'normalization', 
    'feature_selection', 
    'update_singularity_wrappers',
    'build_index'
)

# use current directory as working directory by default
package_dir = os.path.dirname(exseek.__file__)
package_dir = pkg_resources.resource_filename('exseek', '/')
root_dir = os.getcwd()
config_dirs = []

def quoted_string_join(strs, sep=' '):
    quoted = []
    for s in strs:
        if len(s.split()) > 1:
            quoted.append('"' + s + '"')
        else:
            quoted.append(s)
    return sep.join(quoted)

def get_config_file(filename):
    for config_dir in config_dirs:
        if os.path.isfile(os.path.join(config_dir, filename)):
            return os.path.join(config_dir, filename)

def execute_snakemake(exec_method='execv'):
    global snakemake_args

    snakemake_args += ['--snakefile', snakefile, '--configfile', configfile]
    # set root_dir and bin_dir
    extra_config['package_dir'] = package_dir
    extra_config['bin_dir'] = os.path.join(package_dir, 'scripts')
    extra_config['root_dir'] = root_dir
    extra_config['dataset'] = args.dataset
    extra_config['config_dirs'] = ':'.join(config_dirs)
    # extra args
    snakemake_args = [str(s) for s in snakemake_args]
    snakemake_args += extra_args

    if args.singularity:
        if not os.path.isdir(config['container']['wrapper_dir']):
            update_singularity_wrappers()
        logger.info('enable singularity')
        extra_config['use_singularity'] = True
    
    # extra config
    snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()]
    #subprocess.check_call(snakemake_args, shell=False)
    logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args)))
    # run snakemake
    if exec_method == 'execv':
        os.execv(snakemake_path, snakemake_args)
    elif exec_method == 'check_call':
        subprocess.check_call(snakemake_args, shell=False)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='exSeek main program')

    parser.add_argument('step', type=str)
    parser.add_argument('--dataset', '-d', type=str, required=True,
        help='dataset name')
    parser.add_argument('--workdir', '-w', type=str,
        help='working directory')
    parser.add_argument('--config-dir', '-c', type=str,
        help='directory for configuration files')
    parser.add_argument('--cluster', action='store_true', help='submit to cluster')
    parser.add_argument('--singularity', action='store_true',
        help='use singularity')
    args, extra_args = parser.parse_known_args()
    # default number of jobs
    is_set_jobs = False
    for arg in extra_args:
        if (re.match(r'-j[0-9]*', arg) is not None) \
            or (re.match(r'--cores[0-9]*', arg) is not None) \
            or (re.match(r'--jobs[0-9]*', arg) is not None):
            is_set_jobs = True
            break
    if not is_set_jobs:
        extra_args += ['-j', '1']

    logger = logging.getLogger('exseek')

    snakefile = None
    
    if args.workdir is not None:
        root_dir = args.workdir
    logger.info('root directory: {}'.format(root_dir))

    config_dirs.append(os.path.join(package_dir, 'config'))
    config_dirs.append(os.path.join(root_dir, 'config'))
    if args.config_dir is not None:
        config_dirs.append(args.config_dir)
    else:
        if os.path.isdir('config'):
            config_dirs.append('config')

    logger.info('read default config file')
    with open(get_config_file('default_config.yaml'), 'r') as f:
        default_config = yaml.load(f)

    # find snakemake executable
    snakemake_path = shutil.which('snakemake')
    if snakemake_path is None:
        raise ValueError('cannot find snakemake command')

    # snakemake command
    snakemake_args = [snakemake_path, '-k', '--rerun-incomplete']
    extra_config = {}
    # check configuration file
    configfile = get_config_file('{}.yaml'.format(args.dataset))
    if configfile is None:
        raise ValueError('cannot find configuration file: {} '.format('{}.yaml'.format(args.dataset)))
    logger.info('read user config file: ' + configfile)
    with open(configfile, 'r') as f:
        config = default_config
        user_config = yaml.load(f)
        config.update(user_config)
    # check cluster configuration
    if args.cluster:
        cluster_config = get_config_file('cluster.yaml')
        if not os.path.isfile(cluster_config):
            if args.cluster_config is None:
                raise ValueError('cannot find cluster.yaml')

        cluster_command = config.get('cluster_command')
        if cluster_command is None:
            raise ValueError('variable cluster_command is not defined in the configuration file')
        snakemake_args += ['--cluster', cluster_command, '--cluster-config', cluster_config]
    
    def update_sequential_mapping():
        snakefile = os.path.join(package_dir, 'snakefiles', 'sequential_mapping.snakemake')
        logger.info('generate sequential_mapping.snakemake')
        update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping',
                '--rna-types', ','.join(config['rna_types']), 
                '--template', os.path.join(package_dir, 'templates', 'sequential_mapping.snakemake'),
                '-o', snakefile]
        logger.info('run ' + ' '.join(update_command))
        subprocess.check_call(update_command, shell=False)
    
    def generate_sequential_mapping_snakefile():
        snakefile = os.path.join(config['output_dir'], 'sequential_mapping.snakemake')
        logger.info('generate sequential_mapping.snakemake')
        update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping',
                '--rna-types', ','.join(config['rna_types']), 
                '--template', os.path.join(package_dir, 'templates', 'sequential_mapping_subworkflow.snakemake'),
                '--common-snakemake', os.path.join(package_dir, 'snakefiles', 'common.snakemake'),
                '-o', snakefile]
        logger.info('run ' + ' '.join(update_command))
        subprocess.check_call(update_command, shell=False)
        return snakefile
        
    def update_singularity_wrappers():
        container_backend = config.get('container', {}).get('backend')
        if container_backend == 'singularity':
            backend_executable = config.get('container', {}).get('singularity_path')
            if backend_executable is None:
                backend_executable = shutil.which('singularity')
            if backend_executable is None:
                raise ValueError('cannot find singularity executable')
            container_image = config.get('container', {}).get('singularity_image')
        elif container_backend == 'udocker':
            backend_executable = config.get('container', {}).get('udocker_path')
            if backend_executable is None:
                backend_executable = shutil.which('udocker')
            if backend_executable is None:
                raise ValueError('cannot find udocker executable')
            container_image = config.get('container', {}).get('udocker_image')
        elif container_backend == 'docker':
            backend_executable = config.get('container', {}).get('docker_path')
            if backend_executable is None:
                backend_executable = shutil.which('docker')
            if backend_executable is None:
                raise ValueError('cannot find docker executable')
            container_image = config.get('container', {}).get('docker_image')
        else:
            raise ValueError('unknown container backend: {}'.format(container_backend))
        logger.info('generate container wrappers')
        subprocess.check_call(['python', os.path.join(package_dir, 'scripts', 'make_singularity_wrappers.py'), 
            '--image', container_image,
            '--list-file', os.path.join(package_dir, 'singularity', 'exports.txt'),
            '--backend', container_backend,
            '--backend-executable', backend_executable,
            '-o', config['container']['wrapper_dir']
        ], shell=False)
        
    # find proper version of snakemake
    if args.step == 'quality_control':
        if config['paired_end']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_pe.snakemake')
        else:
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_se.snakemake')
    elif args.step == 'create_index':
        if config['small_rna']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_small.snakemake')
        else:
            snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_long.snakemake')
    elif args.step == 'cutadapt':
        if config['paired_end']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_pe.snakemake')
        else:
            snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_se.snakemake')
    elif args.step == 'quality_control_clean':
        if config['paired_end']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_pe.snakemake')
        else:
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_se.snakemake')
    elif args.step == 'sequential_mapping':
        snakefile = generate_sequential_mapping_snakefile()
    elif args.step == 'mapping':
        if config['small_rna']:
            # first run sequential_mapping
            snakefile = generate_sequential_mapping_snakefile()
            execute_snakemake(exec_method='check_call')
            snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_small.snakemake')
        else:
            if config['paired_end']:
                snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_pe.snakemake')
            else:
                snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_se.snakemake')
    elif args.step == 'count_matrix':
        if config['small_rna']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_small.snakemake')
        else:
            snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_long.snakemake')
    elif args.step == 'combine_domains':
        if config['small_rna']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'combine_domains_with_small.snakemake')
        else:
            raise ValueError('combine_domains can only be applied to small RNA-seq data')
    elif args.step == 'update_sequential_mapping':
        if config['small_rna']:
            update_sequential_mapping()
        sys.exit(0)
    elif args.step == 'update_singularity_wrappers':
        if args.singularity is None:
            raise ValueError('argument --singularity is required for step: update-singularity-wrappers')
        update_singularity_wrappers()
        sys.exit(0)
    elif args.step == 'bigwig':
        if config['small_rna']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_small.snakemake')
        else:
            snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_long.snakemake')
    elif args.step == 'call_domains':
        if config['small_rna']:
            snakefile = os.path.join(package_dir, 'snakefiles', 'call_domains.snakemake')
        else:
            raise ValueError('call_domains can only be applied to small RNA-seq data')
    elif args.step in steps:
        snakefile = os.path.join(package_dir, 'snakefiles', args.step + '.snakemake')
    else:
        raise ValueError('unknown step: ' + args.step)
    execute_snakemake()
    '''
    snakemake_args += ['--snakefile', snakefile, '--configfile', configfile]
    # set root_dir and bin_dir
    extra_config['package_dir'] = package_dir
    extra_config['bin_dir'] = os.path.join(package_dir, 'scripts')
    extra_config['root_dir'] = root_dir
    extra_config['dataset'] = args.dataset
    extra_config['config_dirs'] = ':'.join(config_dirs)
    # extra args
    snakemake_args = [str(s) for s in snakemake_args]
    snakemake_args += extra_args

    if args.singularity:
        if not os.path.isdir(config['container']['wrapper_dir']):
            update_singularity_wrappers()
        logger.info('enable singularity')
        extra_config['use_singularity'] = True
    
    # extra config
    snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()]
    #subprocess.check_call(snakemake_args, shell=False)
    logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args)))
    # run snakemake
    os.execv(snakemake_path, snakemake_args)
    '''