a b/singlecellmultiomics/methylation/methylationtab_to_bed.py
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
import argparse
4
import pyBigWig
5
from multiprocessing import Pool
6
from glob import glob
7
import os
8
import gzip
9
import numpy as np
10
import pandas as pd
11
import matplotlib.pyplot as plt
12
from singlecellmultiomics.methylation import sort_methylation_tabfile, methylation_tabfile_to_bed
13
from singlecellmultiomics.utils import create_fasta_dict_file
14
15
16
if __name__ == '__main__':
17
    argparser = argparse.ArgumentParser(
18
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
19
        description='Convert methylation calls from TAPS-tabulator to a methylation (big)bed file')
20
21
    argparser.add_argument('tabulated_file', type=str)
22
    argparser.add_argument(
23
        '-ref',
24
        type=str,
25
        help='path to reference fasta file, only required for bigbed files')
26
27
    argparser.add_argument(
28
        '--force-sort',
29
        action='store_true',
30
        help='Perform sort even when .sorted.gz already exists')
31
32
    argparser.add_argument(
33
        '-o',
34
        type=str,
35
        help='Output path, ends in .bed to get a bed file and .bb to get a compressed bigbed file')
36
37
    args = argparser.parse_args()
38
39
    tabulated_file = args.tabulated_file
40
    # First we need to sort the callfile
41
    if tabulated_file.endswith('.sorted.gz') and not args.force_sort:
42
        print('Input is sorted')
43
    else:
44
        if  args.force_sort:
45
            print("Performing sort")
46
        else:
47
            print("Input seems not to be sorted. Performing sort")
48
        pathout = tabulated_file + '.sorted.gz'
49
        sort_methylation_tabfile(tabulated_file, pathout)
50
        tabulated_file = pathout
51
52
    if args.o.endswith('.bb'):
53
        print('exporting to big-bed file')
54
        if args.ref is None:
55
            raise ValueError('-ref is required for bigbed output')
56
57
        dict_path = create_fasta_dict_file(args.ref)
58
        # We need to write the bed file temporarily
59
        output_path = args.o.replace('.bb','.bed')
60
        methylation_tabfile_to_bed(tabulated_file, output_path, invert_strand=True)
61
62
        # Perform bigbed conversion:
63
        os.system(f'bedToBigBed {output_path} {dict_path} {args.o} -type=bed8+3')
64
        if os.path.exists(args.o):
65
            os.remove(output_path)
66
            print('Completed')
67
        else:
68
            print('bedToBigBed failed, is bedToBigBed installed ?')
69
    else:
70
        print('exporting to bed file')
71
        output_path = args.o
72
        methylation_tabfile_to_bed(tabulated_file, args.o,  invert_strand=True)