Switch to unified view

a b/BioAid/MMBSearchTK/makeMMBSearchRef.py
1
# This code was developed and authored by Jerzy Twarowski in Malkova Lab at the University of Iowa 
2
# Contact: jerzymateusz-twarowski@uiowa.edu, tvarovski1@gmail.com
3
4
import regex as re
5
6
def createMMBSearchReference(file_path_in, file_path_out):
7
  #good for human genome
8
  file_in = open(file_path_in, "r")
9
  save=False
10
  readlines=True
11
  while readlines:
12
    try:
13
      line = file_in.readline()
14
    except:
15
      print("Failed to read line. EOF? Exiting...")
16
      readlines=False
17
      break
18
19
    if line[0] == ">":
20
      if re.search("chromosome.*Primary.Assembly$", line) != None:
21
        print(line)
22
        linewords = line.split()
23
        chromosome=linewords[4].strip(",")
24
        if (chromosome=="1") | (chromosome=="2"):
25
          chromosome=f">chr0{chromosome}\n"
26
        elif chromosome=="X":
27
          chromosome=f">chrX\n"
28
        elif chromosome=="Y":
29
          chromosome=f">chrY\n"
30
        else:
31
          chromosome=f">chr{chromosome}\n"
32
        line=chromosome
33
        print(line)
34
        save=True
35
      #fix for mitochondrial chr
36
      elif re.search("mitochondrion, complete genome$", line) != None:
37
        print(line)
38
        line=f">chrM\n"
39
        print(line)
40
        save=True
41
      else:
42
        save=False
43
44
    if save:
45
      with open(file_path_out, "a") as file_out:
46
        file_out.write(line)