|
a |
|
b/BioAid/MMBSearchTK/makeMMBSearchRef.py |
|
|
1 |
# This code was developed and authored by Jerzy Twarowski in Malkova Lab at the University of Iowa |
|
|
2 |
# Contact: jerzymateusz-twarowski@uiowa.edu, tvarovski1@gmail.com |
|
|
3 |
|
|
|
4 |
import regex as re |
|
|
5 |
|
|
|
6 |
def createMMBSearchReference(file_path_in, file_path_out): |
|
|
7 |
#good for human genome |
|
|
8 |
file_in = open(file_path_in, "r") |
|
|
9 |
save=False |
|
|
10 |
readlines=True |
|
|
11 |
while readlines: |
|
|
12 |
try: |
|
|
13 |
line = file_in.readline() |
|
|
14 |
except: |
|
|
15 |
print("Failed to read line. EOF? Exiting...") |
|
|
16 |
readlines=False |
|
|
17 |
break |
|
|
18 |
|
|
|
19 |
if line[0] == ">": |
|
|
20 |
if re.search("chromosome.*Primary.Assembly$", line) != None: |
|
|
21 |
print(line) |
|
|
22 |
linewords = line.split() |
|
|
23 |
chromosome=linewords[4].strip(",") |
|
|
24 |
if (chromosome=="1") | (chromosome=="2"): |
|
|
25 |
chromosome=f">chr0{chromosome}\n" |
|
|
26 |
elif chromosome=="X": |
|
|
27 |
chromosome=f">chrX\n" |
|
|
28 |
elif chromosome=="Y": |
|
|
29 |
chromosome=f">chrY\n" |
|
|
30 |
else: |
|
|
31 |
chromosome=f">chr{chromosome}\n" |
|
|
32 |
line=chromosome |
|
|
33 |
print(line) |
|
|
34 |
save=True |
|
|
35 |
#fix for mitochondrial chr |
|
|
36 |
elif re.search("mitochondrion, complete genome$", line) != None: |
|
|
37 |
print(line) |
|
|
38 |
line=f">chrM\n" |
|
|
39 |
print(line) |
|
|
40 |
save=True |
|
|
41 |
else: |
|
|
42 |
save=False |
|
|
43 |
|
|
|
44 |
if save: |
|
|
45 |
with open(file_path_out, "a") as file_out: |
|
|
46 |
file_out.write(line) |