|
a |
|
b/exseek/snakefiles/bigwig_small.snakemake |
|
|
1 |
include: 'common.snakemake' |
|
|
2 |
|
|
|
3 |
rna_types_tbigwig = list(filter(lambda x: x not in ('rRNA', 'spikein', 'univec'), rna_types)) |
|
|
4 |
rna_types_transcriptome = list(filter(lambda x: x not in ('rRNA', 'spikein', 'univec', 'piRNA', 'miRNA'), rna_types)) |
|
|
5 |
|
|
|
6 |
rule all: |
|
|
7 |
input: |
|
|
8 |
tbigwig=expand('{output_dir}/tbigwig/{sample_id}.{rna_type}.bigWig', |
|
|
9 |
output_dir=output_dir, sample_id=sample_ids, rna_type=['transcriptome']), |
|
|
10 |
tbigwig_normalized=expand('{output_dir}/tbigwig_normalized/{sample_id}.{rna_type}.bigWig', |
|
|
11 |
output_dir=output_dir, sample_id=sample_ids, rna_type=['transcriptome']) |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
rule merge_tbam_transcriptome: |
|
|
15 |
input: |
|
|
16 |
lambda wildcards: expand('{output_dir}/tbam/{sample_id}/{rna_type}.bam', |
|
|
17 |
output_dir=wildcards.output_dir, sample_id=wildcards.sample_id, rna_type=rna_types_transcriptome) |
|
|
18 |
output: |
|
|
19 |
'{output_dir}/tbam/{sample_id}/transcriptome.bam' |
|
|
20 |
shell: |
|
|
21 |
'''samtools merge -f {output} {input} |
|
|
22 |
''' |
|
|
23 |
|
|
|
24 |
rule sort_tbam: |
|
|
25 |
input: |
|
|
26 |
'{output_dir}/tbam/{sample_id}/{rna_type}.bam' |
|
|
27 |
output: |
|
|
28 |
'{output_dir}/tbam_sorted/{sample_id}/{rna_type}.bam' |
|
|
29 |
params: |
|
|
30 |
temp_dir=config['temp_dir'] |
|
|
31 |
shell: |
|
|
32 |
'''samtools sort -T {params.temp_dir} -o {output} {input} |
|
|
33 |
''' |
|
|
34 |
|
|
|
35 |
rule tbam_to_bedgraph: |
|
|
36 |
input: |
|
|
37 |
bam='{output_dir}/tbam_sorted/{sample_id}/{rna_type}.bam', |
|
|
38 |
chrom_sizes=genome_dir + '/chrom_sizes/{rna_type}' |
|
|
39 |
output: |
|
|
40 |
'{output_dir}/tbedgraph/{sample_id}.{rna_type}.bedGraph' |
|
|
41 |
params: |
|
|
42 |
temp_dir=config['temp_dir'] |
|
|
43 |
run: |
|
|
44 |
shell('''bedtools genomecov -ibam {input.bam} -bg -split | LC_COLLATE=C sort -T {params.temp_dir} -k1,1 -k2,2n > {output}''') |
|
|
45 |
# write a zero line if the output file is empty |
|
|
46 |
''' |
|
|
47 |
with open(output[0], 'r') as f: |
|
|
48 |
data = f.readline() |
|
|
49 |
if len(data) == 0: |
|
|
50 |
with open(input.chrom_sizes, 'r') as f: |
|
|
51 |
chrom, size = f.readline().strip().split('\t') |
|
|
52 |
size = int(size) |
|
|
53 |
with open(output[0], 'w') as f: |
|
|
54 |
f.write('{0}\t{1}\t{2}\t0'.format(chrom, 0, size)) |
|
|
55 |
''' |
|
|
56 |
|
|
|
57 |
rule tbedgraph_to_bigwig: |
|
|
58 |
input: |
|
|
59 |
bedgraph='{output_dir}/tbedgraph/{sample_id}.{rna_type}.bedGraph', |
|
|
60 |
chrom_sizes=genome_dir + '/chrom_sizes/{rna_type}' |
|
|
61 |
output: |
|
|
62 |
'{output_dir}/tbigwig/{sample_id}.{rna_type}.bigWig' |
|
|
63 |
shell: |
|
|
64 |
'''bedGraphToBigWig {input.bedgraph} {input.chrom_sizes} {output}''' |
|
|
65 |
|
|
|
66 |
|
|
|
67 |
rule normalize_tbigwig: |
|
|
68 |
input: |
|
|
69 |
summary='{output_dir}/summary/read_counts.txt', |
|
|
70 |
bam='{output_dir}/tbam/{sample_id}/{rna_type}.bam', |
|
|
71 |
bigwig='{output_dir}/tbigwig/{sample_id}.{rna_type}.bigWig', |
|
|
72 |
chrom_sizes=genome_dir + '/chrom_sizes/{rna_type}' |
|
|
73 |
output: |
|
|
74 |
bigwig='{output_dir}/tbigwig_normalized/{sample_id}.{rna_type}.bigWig', |
|
|
75 |
bedgraph=temp('{output_dir}/tbigwig_normalized/{sample_id}.{rna_type}.bedGraph') |
|
|
76 |
run: |
|
|
77 |
library_size = get_library_size_small(input.summary, wildcards.sample_id) |
|
|
78 |
shell(r'''bigWigToBedGraph {input.bigwig} stdout \ |
|
|
79 |
| awk -v d="{library_size}" 'BEGIN{{OFS="\t";FS="\t"}}{{print $1,$2,$3,1000000.0*$4/d}}' > {output.bedgraph}''') |
|
|
80 |
shell(r'''bedGraphToBigWig {output.bedgraph} {input.chrom_sizes} {output.bigwig}''') |
|
|
81 |
|
|
|
82 |
|