[0a8e41]: / neusomatic / python / merge_post_vcfs.py

Download this file

73 lines (60 with data), 2.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python
#-------------------------------------------------------------------------
# merge_post_vcfs.py
# Merge resolved variants and other predicted variants and output the final NeuSomatic .vcf
#-------------------------------------------------------------------------
import argparse
import traceback
import fileinput
import logging
import numpy as np
from utils import get_chromosomes_order
def merge_post_vcfs(ref, resolved_vcf, no_resolve_vcf, out_vcf,
pass_threshold, lowqual_threshold):
logger = logging.getLogger(merge_post_vcfs.__name__)
logger.info("------------------------Merge vcfs-------------------------")
chroms_order = get_chromosomes_order(reference=ref)
good_records = []
# Use fileinput to stream as if the two files were concatenated
for line in fileinput.input([no_resolve_vcf, resolved_vcf]):
if line[0] == "#":
continue
chrom, pos, _, ref, alt, score, _, info, format_, gt = line.strip().split()
good_records.append([chrom, pos, ref, alt, gt, score])
with open(out_vcf, "w") as o_f:
o_f.write("##fileformat=VCFv4.2\n")
o_f.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE\n")
for record in sorted(good_records, key=lambda x: [chroms_order[x[0]], x[1]]):
chrom, pos, ref, alt, gt, score = record
prob = np.round(1 - (10**(-float(score) / 10)), 4)
filter_ = "REJECT"
if prob >= pass_threshold:
filter_ = "PASS"
elif prob >= lowqual_threshold:
filter_ = "LowQual"
o_f.write("\t".join([chrom, pos, ".", ref, alt,
"{:.4f}".format(
float(score)), filter_, "SCORE={:.4f}".format(prob),
"GT", "0/1"]) + "\n")
if __name__ == '__main__':
FORMAT = '%(levelname)s %(asctime)-15s %(name)-20s %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(description='merge pred vcfs')
parser.add_argument(
'--ref', help='reference fasta filename', required=True)
parser.add_argument('--resolved_vcf', help='resolved_vcf', required=True)
parser.add_argument('--no_resolve_vcf',
help='no resolve vcf', required=True)
parser.add_argument('--out_vcf', help='output vcf', required=True)
args = parser.parse_args()
logger.info(args)
try:
merge_post_vcfs(args.ref, args.resolved_vcf,
args.no_resolve_vcf, args.out_vcf)
except Exception as e:
logger.error(traceback.format_exc())
logger.error("Aborting!")
logger.error(
"merge_post_vcfs.py failure on arguments: {}".format(args))
raise e