a b/exseek/snakefiles/bigwig_small.snakemake
1
include: 'common.snakemake'
2
3
rna_types_tbigwig = list(filter(lambda x: x not in ('rRNA', 'spikein', 'univec'), rna_types))
4
rna_types_transcriptome = list(filter(lambda x: x not in ('rRNA', 'spikein', 'univec', 'piRNA', 'miRNA'), rna_types))
5
6
rule all:
7
    input:
8
        tbigwig=expand('{output_dir}/tbigwig/{sample_id}.{rna_type}.bigWig',
9
            output_dir=output_dir, sample_id=sample_ids, rna_type=['transcriptome']),
10
        tbigwig_normalized=expand('{output_dir}/tbigwig_normalized/{sample_id}.{rna_type}.bigWig',
11
            output_dir=output_dir, sample_id=sample_ids, rna_type=['transcriptome'])
12
13
14
rule merge_tbam_transcriptome:
15
    input:
16
        lambda wildcards: expand('{output_dir}/tbam/{sample_id}/{rna_type}.bam',
17
            output_dir=wildcards.output_dir, sample_id=wildcards.sample_id, rna_type=rna_types_transcriptome)
18
    output:
19
        '{output_dir}/tbam/{sample_id}/transcriptome.bam'
20
    shell:
21
        '''samtools merge -f {output} {input}
22
        '''
23
24
rule sort_tbam:
25
    input:
26
        '{output_dir}/tbam/{sample_id}/{rna_type}.bam'
27
    output:
28
        '{output_dir}/tbam_sorted/{sample_id}/{rna_type}.bam'
29
    params:
30
        temp_dir=config['temp_dir']
31
    shell:
32
        '''samtools sort -T {params.temp_dir} -o {output} {input}
33
        '''
34
35
rule tbam_to_bedgraph:
36
    input:
37
        bam='{output_dir}/tbam_sorted/{sample_id}/{rna_type}.bam',
38
        chrom_sizes=genome_dir + '/chrom_sizes/{rna_type}'
39
    output:
40
        '{output_dir}/tbedgraph/{sample_id}.{rna_type}.bedGraph'
41
    params:
42
        temp_dir=config['temp_dir']
43
    run:
44
        shell('''bedtools genomecov -ibam {input.bam} -bg -split | LC_COLLATE=C sort -T {params.temp_dir} -k1,1 -k2,2n > {output}''')
45
        # write a zero line if the output file is empty
46
        '''
47
        with open(output[0], 'r') as f:
48
            data = f.readline()
49
        if len(data) == 0:
50
            with open(input.chrom_sizes, 'r') as f:
51
                    chrom, size = f.readline().strip().split('\t')
52
                    size = int(size)
53
            with open(output[0], 'w') as f:
54
                f.write('{0}\t{1}\t{2}\t0'.format(chrom, 0, size))
55
        '''
56
57
rule tbedgraph_to_bigwig:
58
    input:
59
        bedgraph='{output_dir}/tbedgraph/{sample_id}.{rna_type}.bedGraph',
60
        chrom_sizes=genome_dir + '/chrom_sizes/{rna_type}'
61
    output:
62
        '{output_dir}/tbigwig/{sample_id}.{rna_type}.bigWig'
63
    shell:
64
        '''bedGraphToBigWig {input.bedgraph} {input.chrom_sizes} {output}'''
65
66
67
rule normalize_tbigwig:
68
    input:
69
        summary='{output_dir}/summary/read_counts.txt',
70
        bam='{output_dir}/tbam/{sample_id}/{rna_type}.bam',
71
        bigwig='{output_dir}/tbigwig/{sample_id}.{rna_type}.bigWig',
72
        chrom_sizes=genome_dir + '/chrom_sizes/{rna_type}'
73
    output:
74
        bigwig='{output_dir}/tbigwig_normalized/{sample_id}.{rna_type}.bigWig',
75
        bedgraph=temp('{output_dir}/tbigwig_normalized/{sample_id}.{rna_type}.bedGraph')
76
    run:
77
        library_size = get_library_size_small(input.summary, wildcards.sample_id)
78
        shell(r'''bigWigToBedGraph {input.bigwig} stdout \
79
            | awk -v d="{library_size}" 'BEGIN{{OFS="\t";FS="\t"}}{{print $1,$2,$3,1000000.0*$4/d}}' > {output.bedgraph}''')
80
        shell(r'''bedGraphToBigWig {output.bedgraph} {input.chrom_sizes} {output.bigwig}''')
81
82