|
a |
|
b/bin/exseek |
|
|
1 |
#! /usr/bin/env python |
|
|
2 |
import argparse, sys, os, errno |
|
|
3 |
import logging |
|
|
4 |
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s') |
|
|
5 |
import yaml |
|
|
6 |
import shutil |
|
|
7 |
import shlex |
|
|
8 |
import subprocess |
|
|
9 |
import re |
|
|
10 |
import exseek |
|
|
11 |
import pkg_resources |
|
|
12 |
|
|
|
13 |
steps = ( |
|
|
14 |
'quality_control', |
|
|
15 |
'quality_control_clean', |
|
|
16 |
'cutadapt', |
|
|
17 |
'bigwig', |
|
|
18 |
'mapping', |
|
|
19 |
'count_matrix', |
|
|
20 |
'call_domains', |
|
|
21 |
'normalization', |
|
|
22 |
'feature_selection', |
|
|
23 |
'update_singularity_wrappers', |
|
|
24 |
'build_index' |
|
|
25 |
) |
|
|
26 |
|
|
|
27 |
# use current directory as working directory by default |
|
|
28 |
package_dir = os.path.dirname(exseek.__file__) |
|
|
29 |
package_dir = pkg_resources.resource_filename('exseek', '/') |
|
|
30 |
root_dir = os.getcwd() |
|
|
31 |
config_dirs = [] |
|
|
32 |
|
|
|
33 |
def quoted_string_join(strs, sep=' '): |
|
|
34 |
quoted = [] |
|
|
35 |
for s in strs: |
|
|
36 |
if len(s.split()) > 1: |
|
|
37 |
quoted.append('"' + s + '"') |
|
|
38 |
else: |
|
|
39 |
quoted.append(s) |
|
|
40 |
return sep.join(quoted) |
|
|
41 |
|
|
|
42 |
def get_config_file(filename): |
|
|
43 |
for config_dir in config_dirs: |
|
|
44 |
if os.path.isfile(os.path.join(config_dir, filename)): |
|
|
45 |
return os.path.join(config_dir, filename) |
|
|
46 |
|
|
|
47 |
def execute_snakemake(exec_method='execv'): |
|
|
48 |
global snakemake_args |
|
|
49 |
|
|
|
50 |
snakemake_args += ['--snakefile', snakefile, '--configfile', configfile] |
|
|
51 |
# set root_dir and bin_dir |
|
|
52 |
extra_config['package_dir'] = package_dir |
|
|
53 |
extra_config['bin_dir'] = os.path.join(package_dir, 'scripts') |
|
|
54 |
extra_config['root_dir'] = root_dir |
|
|
55 |
extra_config['dataset'] = args.dataset |
|
|
56 |
extra_config['config_dirs'] = ':'.join(config_dirs) |
|
|
57 |
# extra args |
|
|
58 |
snakemake_args = [str(s) for s in snakemake_args] |
|
|
59 |
snakemake_args += extra_args |
|
|
60 |
|
|
|
61 |
if args.singularity: |
|
|
62 |
if not os.path.isdir(config['container']['wrapper_dir']): |
|
|
63 |
update_singularity_wrappers() |
|
|
64 |
logger.info('enable singularity') |
|
|
65 |
extra_config['use_singularity'] = True |
|
|
66 |
|
|
|
67 |
# extra config |
|
|
68 |
snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()] |
|
|
69 |
#subprocess.check_call(snakemake_args, shell=False) |
|
|
70 |
logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args))) |
|
|
71 |
# run snakemake |
|
|
72 |
if exec_method == 'execv': |
|
|
73 |
os.execv(snakemake_path, snakemake_args) |
|
|
74 |
elif exec_method == 'check_call': |
|
|
75 |
subprocess.check_call(snakemake_args, shell=False) |
|
|
76 |
|
|
|
77 |
if __name__ == '__main__': |
|
|
78 |
parser = argparse.ArgumentParser(description='exSeek main program') |
|
|
79 |
|
|
|
80 |
parser.add_argument('step', type=str) |
|
|
81 |
parser.add_argument('--dataset', '-d', type=str, required=True, |
|
|
82 |
help='dataset name') |
|
|
83 |
parser.add_argument('--workdir', '-w', type=str, |
|
|
84 |
help='working directory') |
|
|
85 |
parser.add_argument('--config-dir', '-c', type=str, |
|
|
86 |
help='directory for configuration files') |
|
|
87 |
parser.add_argument('--cluster', action='store_true', help='submit to cluster') |
|
|
88 |
parser.add_argument('--singularity', action='store_true', |
|
|
89 |
help='use singularity') |
|
|
90 |
args, extra_args = parser.parse_known_args() |
|
|
91 |
# default number of jobs |
|
|
92 |
is_set_jobs = False |
|
|
93 |
for arg in extra_args: |
|
|
94 |
if (re.match(r'-j[0-9]*', arg) is not None) \ |
|
|
95 |
or (re.match(r'--cores[0-9]*', arg) is not None) \ |
|
|
96 |
or (re.match(r'--jobs[0-9]*', arg) is not None): |
|
|
97 |
is_set_jobs = True |
|
|
98 |
break |
|
|
99 |
if not is_set_jobs: |
|
|
100 |
extra_args += ['-j', '1'] |
|
|
101 |
|
|
|
102 |
logger = logging.getLogger('exseek') |
|
|
103 |
|
|
|
104 |
snakefile = None |
|
|
105 |
|
|
|
106 |
if args.workdir is not None: |
|
|
107 |
root_dir = args.workdir |
|
|
108 |
logger.info('root directory: {}'.format(root_dir)) |
|
|
109 |
|
|
|
110 |
config_dirs.append(os.path.join(package_dir, 'config')) |
|
|
111 |
config_dirs.append(os.path.join(root_dir, 'config')) |
|
|
112 |
if args.config_dir is not None: |
|
|
113 |
config_dirs.append(args.config_dir) |
|
|
114 |
else: |
|
|
115 |
if os.path.isdir('config'): |
|
|
116 |
config_dirs.append('config') |
|
|
117 |
|
|
|
118 |
logger.info('read default config file') |
|
|
119 |
with open(get_config_file('default_config.yaml'), 'r') as f: |
|
|
120 |
default_config = yaml.load(f) |
|
|
121 |
|
|
|
122 |
# find snakemake executable |
|
|
123 |
snakemake_path = shutil.which('snakemake') |
|
|
124 |
if snakemake_path is None: |
|
|
125 |
raise ValueError('cannot find snakemake command') |
|
|
126 |
|
|
|
127 |
# snakemake command |
|
|
128 |
snakemake_args = [snakemake_path, '-k', '--rerun-incomplete'] |
|
|
129 |
extra_config = {} |
|
|
130 |
# check configuration file |
|
|
131 |
configfile = get_config_file('{}.yaml'.format(args.dataset)) |
|
|
132 |
if configfile is None: |
|
|
133 |
raise ValueError('cannot find configuration file: {} '.format('{}.yaml'.format(args.dataset))) |
|
|
134 |
logger.info('read user config file: ' + configfile) |
|
|
135 |
with open(configfile, 'r') as f: |
|
|
136 |
config = default_config |
|
|
137 |
user_config = yaml.load(f) |
|
|
138 |
config.update(user_config) |
|
|
139 |
# check cluster configuration |
|
|
140 |
if args.cluster: |
|
|
141 |
cluster_config = get_config_file('cluster.yaml') |
|
|
142 |
if not os.path.isfile(cluster_config): |
|
|
143 |
if args.cluster_config is None: |
|
|
144 |
raise ValueError('cannot find cluster.yaml') |
|
|
145 |
|
|
|
146 |
cluster_command = config.get('cluster_command') |
|
|
147 |
if cluster_command is None: |
|
|
148 |
raise ValueError('variable cluster_command is not defined in the configuration file') |
|
|
149 |
snakemake_args += ['--cluster', cluster_command, '--cluster-config', cluster_config] |
|
|
150 |
|
|
|
151 |
def update_sequential_mapping(): |
|
|
152 |
snakefile = os.path.join(package_dir, 'snakefiles', 'sequential_mapping.snakemake') |
|
|
153 |
logger.info('generate sequential_mapping.snakemake') |
|
|
154 |
update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping', |
|
|
155 |
'--rna-types', ','.join(config['rna_types']), |
|
|
156 |
'--template', os.path.join(package_dir, 'templates', 'sequential_mapping.snakemake'), |
|
|
157 |
'-o', snakefile] |
|
|
158 |
logger.info('run ' + ' '.join(update_command)) |
|
|
159 |
subprocess.check_call(update_command, shell=False) |
|
|
160 |
|
|
|
161 |
def generate_sequential_mapping_snakefile(): |
|
|
162 |
snakefile = os.path.join(config['output_dir'], 'sequential_mapping.snakemake') |
|
|
163 |
logger.info('generate sequential_mapping.snakemake') |
|
|
164 |
update_command = [os.path.join(package_dir, 'scripts', 'generate_snakemake.py'), 'sequential_mapping', |
|
|
165 |
'--rna-types', ','.join(config['rna_types']), |
|
|
166 |
'--template', os.path.join(package_dir, 'templates', 'sequential_mapping_subworkflow.snakemake'), |
|
|
167 |
'--common-snakemake', os.path.join(package_dir, 'snakefiles', 'common.snakemake'), |
|
|
168 |
'-o', snakefile] |
|
|
169 |
logger.info('run ' + ' '.join(update_command)) |
|
|
170 |
subprocess.check_call(update_command, shell=False) |
|
|
171 |
return snakefile |
|
|
172 |
|
|
|
173 |
def update_singularity_wrappers(): |
|
|
174 |
container_backend = config.get('container', {}).get('backend') |
|
|
175 |
if container_backend == 'singularity': |
|
|
176 |
backend_executable = config.get('container', {}).get('singularity_path') |
|
|
177 |
if backend_executable is None: |
|
|
178 |
backend_executable = shutil.which('singularity') |
|
|
179 |
if backend_executable is None: |
|
|
180 |
raise ValueError('cannot find singularity executable') |
|
|
181 |
container_image = config.get('container', {}).get('singularity_image') |
|
|
182 |
elif container_backend == 'udocker': |
|
|
183 |
backend_executable = config.get('container', {}).get('udocker_path') |
|
|
184 |
if backend_executable is None: |
|
|
185 |
backend_executable = shutil.which('udocker') |
|
|
186 |
if backend_executable is None: |
|
|
187 |
raise ValueError('cannot find udocker executable') |
|
|
188 |
container_image = config.get('container', {}).get('udocker_image') |
|
|
189 |
elif container_backend == 'docker': |
|
|
190 |
backend_executable = config.get('container', {}).get('docker_path') |
|
|
191 |
if backend_executable is None: |
|
|
192 |
backend_executable = shutil.which('docker') |
|
|
193 |
if backend_executable is None: |
|
|
194 |
raise ValueError('cannot find docker executable') |
|
|
195 |
container_image = config.get('container', {}).get('docker_image') |
|
|
196 |
else: |
|
|
197 |
raise ValueError('unknown container backend: {}'.format(container_backend)) |
|
|
198 |
logger.info('generate container wrappers') |
|
|
199 |
subprocess.check_call(['python', os.path.join(package_dir, 'scripts', 'make_singularity_wrappers.py'), |
|
|
200 |
'--image', container_image, |
|
|
201 |
'--list-file', os.path.join(package_dir, 'singularity', 'exports.txt'), |
|
|
202 |
'--backend', container_backend, |
|
|
203 |
'--backend-executable', backend_executable, |
|
|
204 |
'-o', config['container']['wrapper_dir'] |
|
|
205 |
], shell=False) |
|
|
206 |
|
|
|
207 |
# find proper version of snakemake |
|
|
208 |
if args.step == 'quality_control': |
|
|
209 |
if config['paired_end']: |
|
|
210 |
snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_pe.snakemake') |
|
|
211 |
else: |
|
|
212 |
snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_se.snakemake') |
|
|
213 |
elif args.step == 'create_index': |
|
|
214 |
if config['small_rna']: |
|
|
215 |
snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_small.snakemake') |
|
|
216 |
else: |
|
|
217 |
snakefile = os.path.join(package_dir, 'snakefiles', 'create_index_long.snakemake') |
|
|
218 |
elif args.step == 'cutadapt': |
|
|
219 |
if config['paired_end']: |
|
|
220 |
snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_pe.snakemake') |
|
|
221 |
else: |
|
|
222 |
snakefile = os.path.join(package_dir, 'snakefiles', 'cutadapt_se.snakemake') |
|
|
223 |
elif args.step == 'quality_control_clean': |
|
|
224 |
if config['paired_end']: |
|
|
225 |
snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_pe.snakemake') |
|
|
226 |
else: |
|
|
227 |
snakefile = os.path.join(package_dir, 'snakefiles', 'quality_control_clean_se.snakemake') |
|
|
228 |
elif args.step == 'sequential_mapping': |
|
|
229 |
snakefile = generate_sequential_mapping_snakefile() |
|
|
230 |
elif args.step == 'mapping': |
|
|
231 |
if config['small_rna']: |
|
|
232 |
# first run sequential_mapping |
|
|
233 |
snakefile = generate_sequential_mapping_snakefile() |
|
|
234 |
execute_snakemake(exec_method='check_call') |
|
|
235 |
snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_small.snakemake') |
|
|
236 |
else: |
|
|
237 |
if config['paired_end']: |
|
|
238 |
snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_pe.snakemake') |
|
|
239 |
else: |
|
|
240 |
snakefile = os.path.join(package_dir, 'snakefiles', 'mapping_long_se.snakemake') |
|
|
241 |
elif args.step == 'count_matrix': |
|
|
242 |
if config['small_rna']: |
|
|
243 |
snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_small.snakemake') |
|
|
244 |
else: |
|
|
245 |
snakefile = os.path.join(package_dir, 'snakefiles', 'count_matrix_long.snakemake') |
|
|
246 |
elif args.step == 'combine_domains': |
|
|
247 |
if config['small_rna']: |
|
|
248 |
snakefile = os.path.join(package_dir, 'snakefiles', 'combine_domains_with_small.snakemake') |
|
|
249 |
else: |
|
|
250 |
raise ValueError('combine_domains can only be applied to small RNA-seq data') |
|
|
251 |
elif args.step == 'update_sequential_mapping': |
|
|
252 |
if config['small_rna']: |
|
|
253 |
update_sequential_mapping() |
|
|
254 |
sys.exit(0) |
|
|
255 |
elif args.step == 'update_singularity_wrappers': |
|
|
256 |
if args.singularity is None: |
|
|
257 |
raise ValueError('argument --singularity is required for step: update-singularity-wrappers') |
|
|
258 |
update_singularity_wrappers() |
|
|
259 |
sys.exit(0) |
|
|
260 |
elif args.step == 'bigwig': |
|
|
261 |
if config['small_rna']: |
|
|
262 |
snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_small.snakemake') |
|
|
263 |
else: |
|
|
264 |
snakefile = os.path.join(package_dir, 'snakefiles', 'bigwig_long.snakemake') |
|
|
265 |
elif args.step == 'call_domains': |
|
|
266 |
if config['small_rna']: |
|
|
267 |
snakefile = os.path.join(package_dir, 'snakefiles', 'call_domains.snakemake') |
|
|
268 |
else: |
|
|
269 |
raise ValueError('call_domains can only be applied to small RNA-seq data') |
|
|
270 |
elif args.step in steps: |
|
|
271 |
snakefile = os.path.join(package_dir, 'snakefiles', args.step + '.snakemake') |
|
|
272 |
else: |
|
|
273 |
raise ValueError('unknown step: ' + args.step) |
|
|
274 |
execute_snakemake() |
|
|
275 |
''' |
|
|
276 |
snakemake_args += ['--snakefile', snakefile, '--configfile', configfile] |
|
|
277 |
# set root_dir and bin_dir |
|
|
278 |
extra_config['package_dir'] = package_dir |
|
|
279 |
extra_config['bin_dir'] = os.path.join(package_dir, 'scripts') |
|
|
280 |
extra_config['root_dir'] = root_dir |
|
|
281 |
extra_config['dataset'] = args.dataset |
|
|
282 |
extra_config['config_dirs'] = ':'.join(config_dirs) |
|
|
283 |
# extra args |
|
|
284 |
snakemake_args = [str(s) for s in snakemake_args] |
|
|
285 |
snakemake_args += extra_args |
|
|
286 |
|
|
|
287 |
if args.singularity: |
|
|
288 |
if not os.path.isdir(config['container']['wrapper_dir']): |
|
|
289 |
update_singularity_wrappers() |
|
|
290 |
logger.info('enable singularity') |
|
|
291 |
extra_config['use_singularity'] = True |
|
|
292 |
|
|
|
293 |
# extra config |
|
|
294 |
snakemake_args += ['--config'] + ['{}={}'.format(key, val) for key, val in extra_config.items()] |
|
|
295 |
#subprocess.check_call(snakemake_args, shell=False) |
|
|
296 |
logger.info('run snakemake: {}'.format(quoted_string_join(snakemake_args))) |
|
|
297 |
# run snakemake |
|
|
298 |
os.execv(snakemake_path, snakemake_args) |
|
|
299 |
''' |