Diff of /bin/exseek [000000] .. [4c33d4]

Switch to side-by-side view

--- a
+++ b/bin/exseek
@@ -0,0 +1,299 @@
+#! /usr/bin/env python
+import argparse, sys, os, errno
+import logging
+logging.basicConfig(level=logging.INFO, format='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s')
+import yaml
+import shutil
+import shlex
+import subprocess
+import re
+import exseek
+import pkg_resources
+
+steps = (
+    'quality_control',
+    'quality_control_clean',
+    'cutadapt', 
+    'bigwig',
+    'mapping',
+    'count_matrix', 
+    'call_domains', 
+    'normalization', 
+    'feature_selection', 
+    'update_singularity_wrappers',
+    'build_index'
+)
+
+# use current directory as working directory by default
+package_dir = os.path.dirname(exseek.__file__)
+package_dir = pkg_resources.resource_filename('exseek', '/')
+root_dir = os.getcwd()
+config_dirs = []
+
+def quoted_string_join(strs, sep=' '):
+    quoted = []
+    for s in strs:
+        if len(s.split()) > 1:
+            quoted.append('"' + s + '"')
+        else:
+            quoted.append(s)
+    return sep.join(quoted)
+
+def get_config_file(filename):
+    for config_dir in config_dirs:
+        if os.path.isfile(os.path.join(config_dir, filename)):
+            return os.path.join(config_dir, filename)
+
+def execute_snakemake(exec_method='execv'):
+    global snakemake_args
+
+    snakemake_args += ['--snakefile', snakefile, '--configfile', configfile]
+    # set root_dir and bin_dir
+    extra_config['package_dir'] = package_dir
+    extra_config['bin_dir'] = os.path.join(package_dir, 'scripts')
+    extra_config['root_dir'] = root_dir
+    extra_config['dataset'] = args.dataset
+    extra_config['config_dirs'] = ':'.join(config_dirs)
+    # extra args
+    snakemake_args = [str(s) for s in snakemake_args]
+    snakemake_args += extra_args
+
+    if args.singularity:
+        if not os.path.isdir(config['container']['wrapper_dir']):
+            update_singularity_wrappers()
+        logger.info('enable singularity')
+        extra_config['use_singularity'] = True
+    
+    # extra config
+    snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()]
+    #subprocess.check_call(snakemake_args, shell=False)
+    logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args)))
+    # run snakemake
+    if exec_method == 'execv':
+        os.execv(snakemake_path, snakemake_args)
+    elif exec_method == 'check_call':
+        subprocess.check_call(snakemake_args, shell=False)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='exSeek main program')
+
+    parser.add_argument('step', type=str)
+    parser.add_argument('--dataset', '-d', type=str, required=True,
+        help='dataset name')
+    parser.add_argument('--workdir', '-w', type=str,
+        help='working directory')
+    parser.add_argument('--config-dir', '-c', type=str,
+        help='directory for configuration files')
+    parser.add_argument('--cluster', action='store_true', help='submit to cluster')
+    parser.add_argument('--singularity', action='store_true',
+        help='use singularity')
+    args, extra_args = parser.parse_known_args()
+    # default number of jobs
+    is_set_jobs = False
+    for arg in extra_args:
+        if (re.match(r'-j[0-9]*', arg) is not None) \
+            or (re.match(r'--cores[0-9]*', arg) is not None) \
+            or (re.match(r'--jobs[0-9]*', arg) is not None):
+            is_set_jobs = True
+            break
+    if not is_set_jobs:
+        extra_args += ['-j', '1']
+
+    logger = logging.getLogger('exseek')
+
+    snakefile = None
+    
+    if args.workdir is not None:
+        root_dir = args.workdir
+    logger.info('root directory: {}'.format(root_dir))
+
+    config_dirs.append(os.path.join(package_dir, 'config'))
+    config_dirs.append(os.path.join(root_dir, 'config'))
+    if args.config_dir is not None:
+        config_dirs.append(args.config_dir)
+    else:
+        if os.path.isdir('config'):
+            config_dirs.append('config')
+
+    logger.info('read default config file')
+    with open(get_config_file('default_config.yaml'), 'r') as f:
+        default_config = yaml.load(f)
+
+    # find snakemake executable
+    snakemake_path = shutil.which('snakemake')
+    if snakemake_path is None:
+        raise ValueError('cannot find snakemake command')
+
+    # snakemake command
+    snakemake_args = [snakemake_path, '-k', '--rerun-incomplete']
+    extra_config = {}
+    # check configuration file
+    configfile = get_config_file('{}.yaml'.format(args.dataset))
+    if configfile is None:
+        raise ValueError('cannot find configuration file: {} '.format('{}.yaml'.format(args.dataset)))
+    logger.info('read user config file: ' + configfile)
+    with open(configfile, 'r') as f:
+        config = default_config
+        user_config = yaml.load(f)
+        config.update(user_config)
+    # check cluster configuration
+    if args.cluster:
+        cluster_config = get_config_file('cluster.yaml')
+        if not os.path.isfile(cluster_config):
+            if args.cluster_config is None:
+                raise ValueError('cannot find cluster.yaml')
+
+        cluster_command = config.get('cluster_command')
+        if cluster_command is None:
+            raise ValueError('variable cluster_command is not defined in the configuration file')
+        snakemake_args += ['--cluster', cluster_command, '--cluster-config', cluster_config]
+    
+    def update_sequential_mapping():
+        snakefile = os.path.join(package_dir, 'snakefiles', 'sequential_mapping.snakemake')
+        logger.info('generate sequential_mapping.snakemake')
+        update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping',
+                '--rna-types', ','.join(config['rna_types']), 
+                '--template', os.path.join(package_dir, 'templates', 'sequential_mapping.snakemake'),
+                '-o', snakefile]
+        logger.info('run ' + ' '.join(update_command))
+        subprocess.check_call(update_command, shell=False)
+    
+    def generate_sequential_mapping_snakefile():
+        snakefile = os.path.join(config['output_dir'], 'sequential_mapping.snakemake')
+        logger.info('generate sequential_mapping.snakemake')
+        update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping',
+                '--rna-types', ','.join(config['rna_types']), 
+                '--template', os.path.join(package_dir, 'templates', 'sequential_mapping_subworkflow.snakemake'),
+                '--common-snakemake', os.path.join(package_dir, 'snakefiles', 'common.snakemake'),
+                '-o', snakefile]
+        logger.info('run ' + ' '.join(update_command))
+        subprocess.check_call(update_command, shell=False)
+        return snakefile
+        
+    def update_singularity_wrappers():
+        container_backend = config.get('container', {}).get('backend')
+        if container_backend == 'singularity':
+            backend_executable = config.get('container', {}).get('singularity_path')
+            if backend_executable is None:
+                backend_executable = shutil.which('singularity')
+            if backend_executable is None:
+                raise ValueError('cannot find singularity executable')
+            container_image = config.get('container', {}).get('singularity_image')
+        elif container_backend == 'udocker':
+            backend_executable = config.get('container', {}).get('udocker_path')
+            if backend_executable is None:
+                backend_executable = shutil.which('udocker')
+            if backend_executable is None:
+                raise ValueError('cannot find udocker executable')
+            container_image = config.get('container', {}).get('udocker_image')
+        elif container_backend == 'docker':
+            backend_executable = config.get('container', {}).get('docker_path')
+            if backend_executable is None:
+                backend_executable = shutil.which('docker')
+            if backend_executable is None:
+                raise ValueError('cannot find docker executable')
+            container_image = config.get('container', {}).get('docker_image')
+        else:
+            raise ValueError('unknown container backend: {}'.format(container_backend))
+        logger.info('generate container wrappers')
+        subprocess.check_call(['python', os.path.join(package_dir, 'scripts', 'make_singularity_wrappers.py'), 
+            '--image', container_image,
+            '--list-file', os.path.join(package_dir, 'singularity', 'exports.txt'),
+            '--backend', container_backend,
+            '--backend-executable', backend_executable,
+            '-o', config['container']['wrapper_dir']
+        ], shell=False)
+        
+    # find proper version of snakemake
+    if args.step == 'quality_control':
+        if config['paired_end']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_pe.snakemake')
+        else:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_se.snakemake')
+    elif args.step == 'create_index':
+        if config['small_rna']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_small.snakemake')
+        else:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_long.snakemake')
+    elif args.step == 'cutadapt':
+        if config['paired_end']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_pe.snakemake')
+        else:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_se.snakemake')
+    elif args.step == 'quality_control_clean':
+        if config['paired_end']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_pe.snakemake')
+        else:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_se.snakemake')
+    elif args.step == 'sequential_mapping':
+        snakefile = generate_sequential_mapping_snakefile()
+    elif args.step == 'mapping':
+        if config['small_rna']:
+            # first run sequential_mapping
+            snakefile = generate_sequential_mapping_snakefile()
+            execute_snakemake(exec_method='check_call')
+            snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_small.snakemake')
+        else:
+            if config['paired_end']:
+                snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_pe.snakemake')
+            else:
+                snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_se.snakemake')
+    elif args.step == 'count_matrix':
+        if config['small_rna']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_small.snakemake')
+        else:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_long.snakemake')
+    elif args.step == 'combine_domains':
+        if config['small_rna']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'combine_domains_with_small.snakemake')
+        else:
+            raise ValueError('combine_domains can only be applied to small RNA-seq data')
+    elif args.step == 'update_sequential_mapping':
+        if config['small_rna']:
+            update_sequential_mapping()
+        sys.exit(0)
+    elif args.step == 'update_singularity_wrappers':
+        if args.singularity is None:
+            raise ValueError('argument --singularity is required for step: update-singularity-wrappers')
+        update_singularity_wrappers()
+        sys.exit(0)
+    elif args.step == 'bigwig':
+        if config['small_rna']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_small.snakemake')
+        else:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_long.snakemake')
+    elif args.step == 'call_domains':
+        if config['small_rna']:
+            snakefile = os.path.join(package_dir, 'snakefiles', 'call_domains.snakemake')
+        else:
+            raise ValueError('call_domains can only be applied to small RNA-seq data')
+    elif args.step in steps:
+        snakefile = os.path.join(package_dir, 'snakefiles', args.step + '.snakemake')
+    else:
+        raise ValueError('unknown step: ' + args.step)
+    execute_snakemake()
+    '''
+    snakemake_args += ['--snakefile', snakefile, '--configfile', configfile]
+    # set root_dir and bin_dir
+    extra_config['package_dir'] = package_dir
+    extra_config['bin_dir'] = os.path.join(package_dir, 'scripts')
+    extra_config['root_dir'] = root_dir
+    extra_config['dataset'] = args.dataset
+    extra_config['config_dirs'] = ':'.join(config_dirs)
+    # extra args
+    snakemake_args = [str(s) for s in snakemake_args]
+    snakemake_args += extra_args
+
+    if args.singularity:
+        if not os.path.isdir(config['container']['wrapper_dir']):
+            update_singularity_wrappers()
+        logger.info('enable singularity')
+        extra_config['use_singularity'] = True
+    
+    # extra config
+    snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()]
+    #subprocess.check_call(snakemake_args, shell=False)
+    logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args)))
+    # run snakemake
+    os.execv(snakemake_path, snakemake_args)
+    '''
\ No newline at end of file