Diff of /bin/exseek [000000] .. [4c33d4]

Switch to unified view

a b/bin/exseek
1
#! /usr/bin/env python
2
import argparse, sys, os, errno
3
import logging
4
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s')
5
import yaml
6
import shutil
7
import shlex
8
import subprocess
9
import re
10
import exseek
11
import pkg_resources
12
13
steps = (
14
    'quality_control',
15
    'quality_control_clean',
16
    'cutadapt', 
17
    'bigwig',
18
    'mapping',
19
    'count_matrix', 
20
    'call_domains', 
21
    'normalization', 
22
    'feature_selection', 
23
    'update_singularity_wrappers',
24
    'build_index'
25
)
26
27
# use current directory as working directory by default
28
package_dir = os.path.dirname(exseek.__file__)
29
package_dir = pkg_resources.resource_filename('exseek', '/')
30
root_dir = os.getcwd()
31
config_dirs = []
32
33
def quoted_string_join(strs, sep=' '):
34
    quoted = []
35
    for s in strs:
36
        if len(s.split()) > 1:
37
            quoted.append('"' + s + '"')
38
        else:
39
            quoted.append(s)
40
    return sep.join(quoted)
41
42
def get_config_file(filename):
43
    for config_dir in config_dirs:
44
        if os.path.isfile(os.path.join(config_dir, filename)):
45
            return os.path.join(config_dir, filename)
46
47
def execute_snakemake(exec_method='execv'):
48
    global snakemake_args
49
50
    snakemake_args += ['--snakefile', snakefile, '--configfile', configfile]
51
    # set root_dir and bin_dir
52
    extra_config['package_dir'] = package_dir
53
    extra_config['bin_dir'] = os.path.join(package_dir, 'scripts')
54
    extra_config['root_dir'] = root_dir
55
    extra_config['dataset'] = args.dataset
56
    extra_config['config_dirs'] = ':'.join(config_dirs)
57
    # extra args
58
    snakemake_args = [str(s) for s in snakemake_args]
59
    snakemake_args += extra_args
60
61
    if args.singularity:
62
        if not os.path.isdir(config['container']['wrapper_dir']):
63
            update_singularity_wrappers()
64
        logger.info('enable singularity')
65
        extra_config['use_singularity'] = True
66
    
67
    # extra config
68
    snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()]
69
    #subprocess.check_call(snakemake_args, shell=False)
70
    logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args)))
71
    # run snakemake
72
    if exec_method == 'execv':
73
        os.execv(snakemake_path, snakemake_args)
74
    elif exec_method == 'check_call':
75
        subprocess.check_call(snakemake_args, shell=False)
76
77
if __name__ == '__main__':
78
    parser = argparse.ArgumentParser(description='exSeek main program')
79
80
    parser.add_argument('step', type=str)
81
    parser.add_argument('--dataset', '-d', type=str, required=True,
82
        help='dataset name')
83
    parser.add_argument('--workdir', '-w', type=str,
84
        help='working directory')
85
    parser.add_argument('--config-dir', '-c', type=str,
86
        help='directory for configuration files')
87
    parser.add_argument('--cluster', action='store_true', help='submit to cluster')
88
    parser.add_argument('--singularity', action='store_true',
89
        help='use singularity')
90
    args, extra_args = parser.parse_known_args()
91
    # default number of jobs
92
    is_set_jobs = False
93
    for arg in extra_args:
94
        if (re.match(r'-j[0-9]*', arg) is not None) \
95
            or (re.match(r'--cores[0-9]*', arg) is not None) \
96
            or (re.match(r'--jobs[0-9]*', arg) is not None):
97
            is_set_jobs = True
98
            break
99
    if not is_set_jobs:
100
        extra_args += ['-j', '1']
101
102
    logger = logging.getLogger('exseek')
103
104
    snakefile = None
105
    
106
    if args.workdir is not None:
107
        root_dir = args.workdir
108
    logger.info('root directory: {}'.format(root_dir))
109
110
    config_dirs.append(os.path.join(package_dir, 'config'))
111
    config_dirs.append(os.path.join(root_dir, 'config'))
112
    if args.config_dir is not None:
113
        config_dirs.append(args.config_dir)
114
    else:
115
        if os.path.isdir('config'):
116
            config_dirs.append('config')
117
118
    logger.info('read default config file')
119
    with open(get_config_file('default_config.yaml'), 'r') as f:
120
        default_config = yaml.load(f)
121
122
    # find snakemake executable
123
    snakemake_path = shutil.which('snakemake')
124
    if snakemake_path is None:
125
        raise ValueError('cannot find snakemake command')
126
127
    # snakemake command
128
    snakemake_args = [snakemake_path, '-k', '--rerun-incomplete']
129
    extra_config = {}
130
    # check configuration file
131
    configfile = get_config_file('{}.yaml'.format(args.dataset))
132
    if configfile is None:
133
        raise ValueError('cannot find configuration file: {} '.format('{}.yaml'.format(args.dataset)))
134
    logger.info('read user config file: ' + configfile)
135
    with open(configfile, 'r') as f:
136
        config = default_config
137
        user_config = yaml.load(f)
138
        config.update(user_config)
139
    # check cluster configuration
140
    if args.cluster:
141
        cluster_config = get_config_file('cluster.yaml')
142
        if not os.path.isfile(cluster_config):
143
            if args.cluster_config is None:
144
                raise ValueError('cannot find cluster.yaml')
145
146
        cluster_command = config.get('cluster_command')
147
        if cluster_command is None:
148
            raise ValueError('variable cluster_command is not defined in the configuration file')
149
        snakemake_args += ['--cluster', cluster_command, '--cluster-config', cluster_config]
150
    
151
    def update_sequential_mapping():
152
        snakefile = os.path.join(package_dir, 'snakefiles', 'sequential_mapping.snakemake')
153
        logger.info('generate sequential_mapping.snakemake')
154
        update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping',
155
                '--rna-types', ','.join(config['rna_types']), 
156
                '--template', os.path.join(package_dir, 'templates', 'sequential_mapping.snakemake'),
157
                '-o', snakefile]
158
        logger.info('run ' + ' '.join(update_command))
159
        subprocess.check_call(update_command, shell=False)
160
    
161
    def generate_sequential_mapping_snakefile():
162
        snakefile = os.path.join(config['output_dir'], 'sequential_mapping.snakemake')
163
        logger.info('generate sequential_mapping.snakemake')
164
        update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping',
165
                '--rna-types', ','.join(config['rna_types']), 
166
                '--template', os.path.join(package_dir, 'templates', 'sequential_mapping_subworkflow.snakemake'),
167
                '--common-snakemake', os.path.join(package_dir, 'snakefiles', 'common.snakemake'),
168
                '-o', snakefile]
169
        logger.info('run ' + ' '.join(update_command))
170
        subprocess.check_call(update_command, shell=False)
171
        return snakefile
172
        
173
    def update_singularity_wrappers():
174
        container_backend = config.get('container', {}).get('backend')
175
        if container_backend == 'singularity':
176
            backend_executable = config.get('container', {}).get('singularity_path')
177
            if backend_executable is None:
178
                backend_executable = shutil.which('singularity')
179
            if backend_executable is None:
180
                raise ValueError('cannot find singularity executable')
181
            container_image = config.get('container', {}).get('singularity_image')
182
        elif container_backend == 'udocker':
183
            backend_executable = config.get('container', {}).get('udocker_path')
184
            if backend_executable is None:
185
                backend_executable = shutil.which('udocker')
186
            if backend_executable is None:
187
                raise ValueError('cannot find udocker executable')
188
            container_image = config.get('container', {}).get('udocker_image')
189
        elif container_backend == 'docker':
190
            backend_executable = config.get('container', {}).get('docker_path')
191
            if backend_executable is None:
192
                backend_executable = shutil.which('docker')
193
            if backend_executable is None:
194
                raise ValueError('cannot find docker executable')
195
            container_image = config.get('container', {}).get('docker_image')
196
        else:
197
            raise ValueError('unknown container backend: {}'.format(container_backend))
198
        logger.info('generate container wrappers')
199
        subprocess.check_call(['python', os.path.join(package_dir, 'scripts', 'make_singularity_wrappers.py'), 
200
            '--image', container_image,
201
            '--list-file', os.path.join(package_dir, 'singularity', 'exports.txt'),
202
            '--backend', container_backend,
203
            '--backend-executable', backend_executable,
204
            '-o', config['container']['wrapper_dir']
205
        ], shell=False)
206
        
207
    # find proper version of snakemake
208
    if args.step == 'quality_control':
209
        if config['paired_end']:
210
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_pe.snakemake')
211
        else:
212
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_se.snakemake')
213
    elif args.step == 'create_index':
214
        if config['small_rna']:
215
            snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_small.snakemake')
216
        else:
217
            snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_long.snakemake')
218
    elif args.step == 'cutadapt':
219
        if config['paired_end']:
220
            snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_pe.snakemake')
221
        else:
222
            snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_se.snakemake')
223
    elif args.step == 'quality_control_clean':
224
        if config['paired_end']:
225
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_pe.snakemake')
226
        else:
227
            snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_se.snakemake')
228
    elif args.step == 'sequential_mapping':
229
        snakefile = generate_sequential_mapping_snakefile()
230
    elif args.step == 'mapping':
231
        if config['small_rna']:
232
            # first run sequential_mapping
233
            snakefile = generate_sequential_mapping_snakefile()
234
            execute_snakemake(exec_method='check_call')
235
            snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_small.snakemake')
236
        else:
237
            if config['paired_end']:
238
                snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_pe.snakemake')
239
            else:
240
                snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_se.snakemake')
241
    elif args.step == 'count_matrix':
242
        if config['small_rna']:
243
            snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_small.snakemake')
244
        else:
245
            snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_long.snakemake')
246
    elif args.step == 'combine_domains':
247
        if config['small_rna']:
248
            snakefile = os.path.join(package_dir, 'snakefiles', 'combine_domains_with_small.snakemake')
249
        else:
250
            raise ValueError('combine_domains can only be applied to small RNA-seq data')
251
    elif args.step == 'update_sequential_mapping':
252
        if config['small_rna']:
253
            update_sequential_mapping()
254
        sys.exit(0)
255
    elif args.step == 'update_singularity_wrappers':
256
        if args.singularity is None:
257
            raise ValueError('argument --singularity is required for step: update-singularity-wrappers')
258
        update_singularity_wrappers()
259
        sys.exit(0)
260
    elif args.step == 'bigwig':
261
        if config['small_rna']:
262
            snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_small.snakemake')
263
        else:
264
            snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_long.snakemake')
265
    elif args.step == 'call_domains':
266
        if config['small_rna']:
267
            snakefile = os.path.join(package_dir, 'snakefiles', 'call_domains.snakemake')
268
        else:
269
            raise ValueError('call_domains can only be applied to small RNA-seq data')
270
    elif args.step in steps:
271
        snakefile = os.path.join(package_dir, 'snakefiles', args.step + '.snakemake')
272
    else:
273
        raise ValueError('unknown step: ' + args.step)
274
    execute_snakemake()
275
    '''
276
    snakemake_args += ['--snakefile', snakefile, '--configfile', configfile]
277
    # set root_dir and bin_dir
278
    extra_config['package_dir'] = package_dir
279
    extra_config['bin_dir'] = os.path.join(package_dir, 'scripts')
280
    extra_config['root_dir'] = root_dir
281
    extra_config['dataset'] = args.dataset
282
    extra_config['config_dirs'] = ':'.join(config_dirs)
283
    # extra args
284
    snakemake_args = [str(s) for s in snakemake_args]
285
    snakemake_args += extra_args
286
287
    if args.singularity:
288
        if not os.path.isdir(config['container']['wrapper_dir']):
289
            update_singularity_wrappers()
290
        logger.info('enable singularity')
291
        extra_config['use_singularity'] = True
292
    
293
    # extra config
294
    snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()]
295
    #subprocess.check_call(snakemake_args, shell=False)
296
    logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args)))
297
    # run snakemake
298
    os.execv(snakemake_path, snakemake_args)
299
    '''