# Copyright © 2023 Institute for AI Industry Research (AIR), Tsinghua University.
# License: GNU GPLv3. [See details in LICENSE]
import argparse
import os
import glob
import Bio.PDB
import ray
import random
import json
import mmap
import time
import sys
import traceback
from ray.util.queue import Queue
import numpy as np
sys.path.append(".")
from utils.misc import execute
from utils.ray_tools import ProgressBar
from tqdm import tqdm
import pathlib
import subprocess
import shutil
class JackhmmerRunner:
def __init__(self, database_dir, task_ids, seq_dir, output_dir, n_thread):
self.N_CPU_PER_THREAD = 1
self.n_thread = n_thread
self.database_dir = database_dir
self.task_ids = task_ids
self.seq_dir = seq_dir
self.output_dir = output_dir
if output_dir.endswith("/"):
output_dir = output_dir[:-1]
if not os.path.exists(output_dir):
os.mkdir(output_dir)
self.sto_dir = output_dir + "_sto"
if not os.path.exists(self.sto_dir):
os.mkdir(self.sto_dir)
self.a2m_dir = output_dir + "_a2m"
if not os.path.exists(self.a2m_dir):
os.mkdir(self.a2m_dir)
def split_list(_list, n):
chunk_size = (len(_list) - 1) // n + 1
chunks = [_list[i * chunk_size : (i + 1) * chunk_size] for i in range(n)]
return chunks
@ray.remote(num_cpus=1)
def process_jobs(self, id, jobs_queue, actor):
print("start process", id)
while not jobs_queue.empty():
job = jobs_queue.get()
try:
self.execute_one_job(job)
except:
print(f"failed: {job}")
traceback.print_exception(*sys.exc_info())
try:
actor.update.remote(1)
except:
pass
return 1
def execute_one_job(self, job):
seq_file_Path = pathlib.Path(self.seq_dir) / job[:4] / (job + ".fasta")
output_file_Path = pathlib.Path(self.sto_dir) / (job + ".sto")
execute(
f"jackhmmer"
f" --cpu {self.N_CPU_PER_THREAD}"
f" -A {output_file_Path}"
f" -o /dev/null"
f" -E 0.001"
f" -N 3"
f" {str(seq_file_Path)}"
f" {self.database_dir}"
)
# with open(output_file_Path,"w") as f:
# f.write("")
# # sleep 1 s
# time.sleep(1)
return 1
def change_sto_to_fasta(self):
sto_files = glob.glob(self.sto_dir + "/*.sto")
for sto_path in tqdm(sto_files):
pdb_id = sto_path.split("/")[-1].split(".")[0]
# print(pdb_id)
a2m_path = self.a2m_dir + f"/{pdb_id}.a2m"
execute(
f"esl-reformat --informat stockholm"
f" -o {str(a2m_path)} a2m"
f" {str(sto_path)}"
)
fasta_path = self.output_dir + f"/{pdb_id}.fasta"
output = ""
with open(a2m_path) as f:
for line in f:
line = line.strip()
if line[0] != ">":
output += line
else:
output += "\n" + line + "\n"
output = output.strip()
output = output.split("\n")[:-2]
output = "\n".join(output)
with open(fasta_path, "w") as f:
f.write(output)
def run_jackhmmer(self):
all_jobs = []
with open(self.task_ids, "r") as f:
data = f.readlines()
for line in data:
job = line.strip()
job = job[:4].upper()
all_jobs.append(job)
print("all jobs:", len(all_jobs))
uncompleted_jobs = all_jobs
# completed?
# uncompleted_jobs=[]
# for job in all_jobs:
# if (not is_complete(job)):
# uncompleted_jobs.append(job)
# print("uncompleted jobs:",len(uncompleted_jobs))
ray.init()
job_queue = Queue()
for job in tqdm(uncompleted_jobs):
job_queue.put(job)
print("job queue size:", job_queue.qsize())
pb = ProgressBar(len(all_jobs))
actor = pb.actor
print("actor:", actor)
job_id_list = []
self.n_thread = min(self.n_thread, len(uncompleted_jobs))
for i in range(self.n_thread):
job_id_list.append(self.process_jobs.remote(self, i, job_queue, actor))
pb.print_until_done()
result = ray.get(job_id_list)
print("Run homo search done!")
ray.shutdown()
# remove tmp folders
shutil.rmtree(self.a2m_dir)
shutil.rmtree(self.sto_dir)
# change sto to fasta
self.change_sto_to_fasta()
class LigandPocketExtractor:
def __init__(self, id_file, homoaug_dir, n_thread):
self.id_file = id_file
self.homoaug_dir = homoaug_dir
self.n_thread = n_thread
self.read_ligand_name_chain_name()
def read_ligand_name_chain_name(self):
# read ligand_name
ligand_name = {}
chain_name = {}
with open(self.id_file, "r") as f:
for line in f.readlines():
line = line.strip()
id = line[:4].upper()
ligand_name[id] = line.split("_")[-1]
chain_name[id] = line.split("_")[1]
# print("ligand_name:",ligand_name)
self.ligand_name = ligand_name
self.chain_name = chain_name
def execute_one_job(self, job):
id = job
# print ('############################')
# print (id)
# read original pdb
pdb_file = self.homoaug_dir + "/" + id + "/" + id + "_protein.pdb"
if not os.path.exists(pdb_file):
print("no pdb file for id:", id)
return 1
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
structure = pdb_parser.get_structure(id, pdb_file)
model = structure[0]
chain = model[self.chain_name[id]]
# get pocket_chain to a new pdb
pocket_chain = Bio.PDB.Chain.Chain(self.chain_name[id])
for residue in chain:
if residue.id[0] == " ":
pocket_chain.add(residue)
io = Bio.PDB.PDBIO()
io.set_structure(pocket_chain)
io.save(self.homoaug_dir + "/" + id + "/" + id + "_pocket_chain.pdb")
# save ligand to a new pdb
ligand_chain = Bio.PDB.Chain.Chain(self.chain_name[id])
ligand_found = False
for residue in chain:
if residue.resname == self.ligand_name[id]:
ligand_chain.add(residue)
ligand_found = True
if ligand_found:
io = Bio.PDB.PDBIO()
io.set_structure(ligand_chain)
io.save(self.homoaug_dir + "/" + id + "/" + id + "_ligand.pdb")
else:
print("ligand not found:", id)
return 1
# remove ligand in the id.pocket.pdb
pdb_file = self.homoaug_dir + "/" + id + "/" + id + "_pocket.pdb"
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
structure = pdb_parser.get_structure(id, pdb_file)
model = structure[0]
chain = model["R"]
# get pocket to a new pdb
pocket = Bio.PDB.Chain.Chain("A")
for residue in chain:
if residue.id[0] == " ":
pocket.add(residue)
# write pocket pdb
io = Bio.PDB.PDBIO()
io.set_structure(pocket)
io.save(self.homoaug_dir + "/" + id + "/" + id + "_pocket.pdb")
@ray.remote(num_cpus=1)
def process_jobs(self, id, jobs_queue, actor):
print("start process", id)
while not jobs_queue.empty():
job = jobs_queue.get()
try:
self.execute_one_job(job)
except:
print(f"failed: {job}")
traceback.print_exception(*sys.exc_info())
try:
actor.update.remote(1)
except:
pass
return 1
def run(self):
all_jobs = glob.glob(self.homoaug_dir + "/*")
all_jobs = [x.split("/")[-1] for x in all_jobs]
uncompleted_jobs = all_jobs
job_queue = Queue()
for job in tqdm(uncompleted_jobs):
job_queue.put(job)
print("job queue size:", job_queue.qsize())
pb = ProgressBar(len(all_jobs))
actor = pb.actor
job_id_list = []
self.n_thread = min(self.n_thread, len(uncompleted_jobs))
for i in range(self.n_thread):
job_id_list.append(self.process_jobs.remote(self, i, job_queue, actor))
pb.print_until_done()
result = ray.get(job_id_list)
ray.shutdown()
print("Done!")
class PocketPositionExtractor:
def __init__(self, homoaug_dir, n_thread):
self.homoaug_dir = homoaug_dir
self.n_thread = n_thread
self.aa_3_to_1 = {
"CYS": "C",
"ASP": "D",
"SER": "S",
"GLN": "Q",
"LYS": "K",
"ILE": "I",
"PRO": "P",
"THR": "T",
"PHE": "F",
"ASN": "N",
"GLY": "G",
"HIS": "H",
"LEU": "L",
"ARG": "R",
"TRP": "W",
"ALA": "A",
"VAL": "V",
"GLU": "E",
"TYR": "Y",
"MET": "M",
"MSE": "M",
"CME": "C",
"CSO": "C",
"UNK": "X",
}
def execute_one_job(self, job):
id = job
# print ('############################')
# print (id)
# read pocket_pdb
pocket_pdb_file = self.homoaug_dir + "/" + id + "/" + id + "_pocket.pdb"
pocket_pdb_structure = Bio.PDB.PDBParser().get_structure(id, pocket_pdb_file)
model = pocket_pdb_structure[0]
for chain in model:
pocket_chain_id = chain.id
break
chain = model[pocket_chain_id]
# get the pocket atom coordinates
pocket_atom_coordinates = set()
for residue in chain:
if residue.id[0] != " ":
continue
for atom in residue:
pocket_atom_coordinates.add(tuple(atom.get_coord()))
# print("pocket_atom_coordinates:",pocket_atom_coordinates)
sequence = ""
pocket_chain_pdb_file = (
self.homoaug_dir + "/" + id + "/" + id + "_pocket_chain.pdb"
)
# get the position of the pocket residues in the pocket chain
pocket_chain_structure = Bio.PDB.PDBParser().get_structure(
id, pocket_chain_pdb_file
)
model = pocket_chain_structure[0]
for chain in model:
pocket_chain_id = chain.id
break
chain_num = len(list(model.get_chains()))
if chain_num > 1:
print("error: more than 1 chain in pocket_pdb_file:", id)
return 1
chain = model[pocket_chain_id]
for residue in chain:
if residue.id[0] != " ":
continue
in_pocket = False
for atom in residue:
if tuple(atom.get_coord()) in pocket_atom_coordinates:
in_pocket = True
break
if in_pocket:
sequence += self.aa_3_to_1[residue.resname]
else:
sequence += "-"
# save
sequence_position_file = (
self.homoaug_dir + "/" + id + "/" + id + "_pocket_position.txt"
)
with open(sequence_position_file, "w") as f:
f.write(sequence)
@ray.remote(num_cpus=1)
def process_jobs(self, id, jobs_queue, actor):
print("start process", id)
while not jobs_queue.empty():
job = jobs_queue.get()
try:
self.execute_one_job(job)
except:
print(f"failed: {job}")
traceback.print_exception(*sys.exc_info())
try:
actor.update.remote(1)
except:
pass
return 1
def run(self):
all_jobs = []
for id in os.listdir(self.homoaug_dir):
if os.path.isfile(self.homoaug_dir + "/" + id + "/" + id + "_ligand.pdb"):
if os.path.isfile(
self.homoaug_dir + "/" + id + "/" + id + "_pocket.pdb"
):
all_jobs.append(id)
uncompleted_jobs = all_jobs
job_queue = Queue()
for job in tqdm(uncompleted_jobs):
job_queue.put(job)
print("job queue size:", job_queue.qsize())
pb = ProgressBar(len(all_jobs))
actor = pb.actor
job_id_list = []
self.n_thread = min(self.n_thread, len(uncompleted_jobs))
for i in range(self.n_thread):
job_id_list.append(self.process_jobs.remote(self, i, job_queue, actor))
pb.print_until_done()
result = ray.get(job_id_list)
ray.shutdown()
print("Done!")
class TMalignRunner:
def __init__(
self,
max_extend_num,
homoaug_dir,
MSA_dir,
AF2DB_dir,
TMscore_threshold,
Match_rate_threshold,
n_thread,
):
self.n_thread = n_thread
self.max_extend_num = max_extend_num
self.homoaug_dir = homoaug_dir
self.MSA_dir = MSA_dir
self.AF2DB_dir = AF2DB_dir
self.TMscore_threshold = TMscore_threshold
self.Match_rate_threshold = Match_rate_threshold
def _remove_gap_of_primary_sequence(self, primary_sequence, candidate_sequence):
assert len(primary_sequence) == len(candidate_sequence)
primary_sequence_without_gap = ""
candidate_sequence_without_gap = ""
for i in range(len(primary_sequence)):
if primary_sequence[i] != "-":
primary_sequence_without_gap += primary_sequence[i]
candidate_sequence_without_gap += candidate_sequence[i]
return primary_sequence_without_gap, candidate_sequence_without_gap
def _calc_match_rate(self, pocket_position, Aligned_seq):
total_cnt = 0
match_cnt = 0
for i in range(len(Aligned_seq)):
if pocket_position[i] != "-":
total_cnt += 1
if pocket_position[i] == Aligned_seq[i]:
match_cnt += 1
return match_cnt / total_cnt
def _get_rotate_matrix(self, rotate_matrix_file):
with open(rotate_matrix_file, "r") as f:
data = f.readlines()
u = []
t = []
for i in range(2, 5):
line = data[i].split(" ")
line_float = [float(x) for x in line if x != ""]
t.append(line_float[1])
u.append(line_float[2:])
u = np.array(u)
t = np.array(t)
return u, t
def _read_ligand_pdb(self, ligand_pdb_file):
parser = Bio.PDB.PDBParser()
structure = parser.get_structure("ligand", ligand_pdb_file)
ligand_coords = []
for model in structure:
for chain in model:
for residue in chain:
for atom in residue:
ligand_coords.append(atom.get_coord())
return ligand_coords
@ray.remote(num_cpus=1)
def process_jobs(self, id, jobs_queue, actor):
print("start process", id)
while not jobs_queue.empty():
job = jobs_queue.get()
try:
self.execute_one_job(job)
except:
print(f"failed: {job}")
traceback.print_exception(*sys.exc_info())
try:
actor.update.remote(1)
except:
pass
return 1
def execute_one_job(self, id):
# print("#######################")
print(id)
# get the sequence from pdb
fasta_dir = self.homoaug_dir + "/" + id + "/" + id + ".fasta"
with open(fasta_dir) as f:
fasta = f.readlines()
# read the pocket position
pocket_position_file = (
self.homoaug_dir + "/" + id + "/" + id + "_pocket_position.txt"
)
if not os.path.exists(pocket_position_file):
print("position_file not exist")
return 1
with open(pocket_position_file) as f:
pocket_position = f.readline().strip()
# create rotation matrix dir
rotation_matrix_dir = self.homoaug_dir + "/" + id + "/" + "rotation_matrix/"
if not os.path.exists(rotation_matrix_dir):
os.makedirs(rotation_matrix_dir)
# get the sequence from TMalign
chain_pdb_file = self.homoaug_dir + "/" + id + "/" + id + "_pocket_chain.pdb"
MSA_file = self.MSA_dir + f"/{id}" + ".fasta"
if not os.path.exists(MSA_file):
print("MSA_file not exist")
return 1
MSA_ids = []
with open(MSA_file) as f:
lines = f.readlines()
for idx in range(0, len(lines), 2):
MSA_ids.append(lines[idx].strip().split(" ")[-1])
# create extend dir
extend_dir = self.homoaug_dir + "/" + id + "/" + "extend"
if not os.path.exists(extend_dir):
os.mkdir(extend_dir)
# get ligand
ligand_file = self.homoaug_dir + "/" + id + "/" + id + "_ligand.pdb"
ligand_coords = self._read_ligand_pdb(ligand_file)
# TMalign
cnt = len(glob.glob(extend_dir + "/*"))
for MSA_id in list(MSA_ids):
if cnt >= self.max_extend_num:
break
# calculate TMscore
MSA_pdb_file = self.AF2DB_dir + f"/{MSA_id}.pdb"
if not os.path.exists(MSA_pdb_file):
continue
rotation_matrix_file = rotation_matrix_dir + f"{MSA_id}.txt"
if os.path.exists(rotation_matrix_file):
continue
out_bytes = subprocess.check_output(
["TMalign", MSA_pdb_file, chain_pdb_file, "-m", rotation_matrix_file]
)
out_text = out_bytes.decode("utf-8").strip().split("\n")
TMscore1 = float(out_text[12].split(" ")[1])
TMscore2 = float(out_text[13].split(" ")[1])
(
sequence_from_TMalign,
MSA_aligned_sequence,
) = self._remove_gap_of_primary_sequence(out_text[19], out_text[17])
TMalign_file = rotation_matrix_dir + f"{MSA_id}_TMscore.txt"
with open(TMalign_file, "w") as f:
f.write("TMscore normalized to chain_pdb:" + str(TMscore2) + "\n")
f.write("TMscore normalized to MSA_pdb:" + str(TMscore1) + "\n")
f.write("Aligned sequence : \n")
f.write(sequence_from_TMalign + "\n")
f.write(MSA_aligned_sequence + "\n")
TMscore = TMscore2
# calculate Match_score
Aligned_seq = MSA_aligned_sequence
Match_rate = self._calc_match_rate(pocket_position, Aligned_seq)
# print("MSA_id:",MSA_id)
# print("TMscore:",TMscore)
# print("Match_rate:",Match_rate)
if (
TMscore >= self.TMscore_threshold
and Match_rate >= self.Match_rate_threshold
):
extend_instance_dir = extend_dir + "/" + MSA_id + "/"
# if os.path.exists(extend_instance_dir):
# continue
os.mkdir(extend_instance_dir)
# read ori MSA pdb file
MSA_pdb_file = self.AF2DB_dir + f"/{MSA_id}" + ".pdb"
parser = Bio.PDB.PDBParser()
structure = parser.get_structure(MSA_id, MSA_pdb_file)
model = structure[0]
for chain in model:
MSA_chain_id = chain.id
break
MSA_chain = model[MSA_chain_id]
# get rotate_matrix
rotation_matrix_file = rotation_matrix_dir + f"{MSA_id}.txt"
rotation_matrix = self._get_rotate_matrix(rotation_matrix_file)
for residue in MSA_chain:
for atom in residue:
coord = atom.get_coord()
coord = np.array(coord)
new_coord = (
np.dot(rotation_matrix[0], coord) + rotation_matrix[1]
)
atom.set_coord(new_coord)
# write new pdb file
io = Bio.PDB.PDBIO()
io.set_structure(structure)
io.save(extend_instance_dir + f"{MSA_id}" + "_protein.pdb")
# get pocket , which is in the 6A of ligand
MSA_pocket_file = extend_instance_dir + f"{MSA_id}" + "_pocket.pdb"
for residue in MSA_chain:
remove_atom_ids = []
for atom in residue:
# print("atom: ",atom.id)
coord = atom.get_coord()
f = 0
for ligand_coord in ligand_coords:
dis = np.linalg.norm(coord - ligand_coord)
if np.linalg.norm(coord - ligand_coord) <= 6:
f = 1
break
if f == 0:
remove_atom_ids.append(atom.id)
for atom_id in remove_atom_ids:
residue.detach_child(atom_id)
io = Bio.PDB.PDBIO()
io.set_structure(structure)
io.save(MSA_pocket_file)
cnt += 1
print("finish: pdb_id:", id)
return 1
def run(self):
all_jobs = []
for id in os.listdir(self.homoaug_dir):
if (
os.path.isfile(self.homoaug_dir + "/" + id + "/" + id + "_ligand.pdb")
and os.path.isfile(
self.homoaug_dir + "/" + id + "/" + id + "_pocket.pdb"
)
and os.path.isfile(
self.homoaug_dir + "/" + id + "/" + id + "_pocket_position.txt"
)
):
all_jobs.append(id)
uncompleted_jobs = all_jobs
job_queue = Queue()
for job in tqdm(uncompleted_jobs):
job_queue.put(job)
print("job queue size:", job_queue.qsize())
pb = ProgressBar(len(all_jobs))
actor = pb.actor
job_id_list = []
self.n_thread = min(self.n_thread, len(uncompleted_jobs))
for i in range(self.n_thread):
job_id_list.append(self.process_jobs.remote(self, i, job_queue, actor))
pb.print_until_done()
result = ray.get(job_id_list)
ray.shutdown()
print("Done!")
class HomoAugRunner:
def __init__(self, args):
self.id_file = args.id_file
self.homoaug_dir = args.homoaug_dir
self.fasta_file = args.fasta_file
self.protein_pdb_dir = args.protein_pdb_dir
self.pocket_pdbs_dir = args.pocket_pdbs_dir
self.jackhmmer_output_dir = args.jackhmmer_output_dir
self.database_fasta_path = args.database_fasta_path
self.max_extend_num = args.max_extend_num
self.database_pdb_dir = args.database_pdb_dir
self.TMscore_threshold = args.TMscore_threshold
self.Match_rate_threshold = args.Match_rate_threshold
self.n_thread = args.n_thread
def read_dataset_fasta(self):
protein_seq = {}
with open(self.fasta_file) as f:
fasta = f.readlines()
for i in range(0, len(fasta), 2):
id = fasta[i].strip()
id = id[1:5].upper()
seq = fasta[i + 1].strip()
protein_seq[id] = seq
return protein_seq
def create_dir(self):
# Create homoaug dir and subdirs
# Dir format
# homoaug_dir
# └── id
# ├── id.fasta
# └── id_protein.pdb
# └── id_pocket.pdb
protein_seq = self.read_dataset_fasta()
if not os.path.exists(self.homoaug_dir):
os.mkdir(self.homoaug_dir)
with open(self.id_file, "r") as f:
lines = f.readlines()
for line in tqdm(lines):
id = line.strip()
id = id[:4].upper()
if not os.path.exists(self.homoaug_dir + "/" + id):
os.mkdir(self.homoaug_dir + "/" + id)
# create fasta
with open(self.homoaug_dir + "/" + id + "/" + id + ".fasta", "w") as f:
f.write(">" + id + "\n" + protein_seq[id] + "\n")
# copy pdb
cif_file = glob.glob(self.protein_pdb_dir + "/" + id + "*")
if len(cif_file) != 1:
print("error : having more than 1 cif file", id)
continue
cif_file = cif_file[0]
# read cif
parser = Bio.PDB.MMCIFParser(QUIET=True)
structure = parser.get_structure(id, cif_file)
# get number of models
n_model = len(structure)
if n_model != 1:
print("error : having more than 1 model", id)
continue
# get number of chains
n_chain = len(list(structure.get_chains()))
if n_chain != 1:
print("error : having more than 1 chain", id)
continue
# save pdb
io = Bio.PDB.PDBIO()
io.set_structure(structure)
io.save(self.homoaug_dir + "/" + id + "/" + id + "_protein.pdb")
# os.system("cp "+homoaug_dir+"/"+id+"/"+id+"_protein.pdb "+homoaug_dir+"/"+id+"/"+id+"_pocket_chain.pdb")
# copy pocket
pocket_file = glob.glob(self.pocket_pdbs_dir + "/" + id + "*")
if len(pocket_file) != 1:
print("error : having more than 1 pocket file", id)
continue
pocket_file = pocket_file[0]
# copy to homoaug
os.system(
"cp "
+ pocket_file
+ " "
+ self.homoaug_dir
+ "/"
+ id
+ "/"
+ id
+ "_pocket.pdb"
)
def run(self):
self.create_dir()
# Run jackhmmer
print("# Start running jackhmmer")
jackhmmer_runner = JackhmmerRunner(
database_dir=self.database_fasta_path,
task_ids=self.id_file,
seq_dir=self.homoaug_dir,
output_dir=self.jackhmmer_output_dir,
n_thread=self.n_thread,
)
jackhmmer_runner.run_jackhmmer()
print("# Start running ligand pocket extractor")
ligand_pocket_extractor = LigandPocketExtractor(
self.id_file, self.homoaug_dir, self.n_thread
)
ligand_pocket_extractor.run()
print("# Start running pocket position extractor")
pocket_position_extractor = PocketPositionExtractor(
self.homoaug_dir, self.n_thread
)
pocket_position_extractor.run()
print("# Start running TMalign")
tmalign_runner = TMalignRunner(
self.max_extend_num,
self.homoaug_dir,
self.jackhmmer_output_dir,
self.database_pdb_dir,
self.TMscore_threshold,
self.Match_rate_threshold,
self.n_thread,
)
tmalign_runner.run()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--id_file", type=str, default="/drug/BioLip/tmp.id")
parser.add_argument("--homoaug_dir", type=str, default="/drug/BioLip/homoaug_new")
parser.add_argument(
"--fasta_file",
type=str,
default="/drug/BioLip/BioLiP_v2023-04-13_regularLigand.fasta",
)
parser.add_argument(
"--protein_pdb_dir", type=str, default="/drug/BioLip/protein_pdb"
)
parser.add_argument(
"--pocket_pdbs_dir", type=str, default="/drug/BioLip/pocket_pdb"
)
parser.add_argument(
"--jackhmmer_output_dir", type=str, default="/drug/BioLip/pdbbind_MSA_fasta"
)
parser.add_argument(
"--n_thread", type=int, default=10, help="number of threads for running"
)
parser.add_argument(
"--database_fasta_path",
type=str,
default="/data/protein/AF2DB/AFDB_HC_50.fa",
help="jackhmmer search database, in fasta format",
)
parser.add_argument(
"--database_pdb_dir",
type=str,
default="/drug/AFDB_HC_50_PDB",
help="homoaug search database, e.g. AF2DB",
)
parser.add_argument(
"--max_extend_num",
type=int,
default=20,
help="max number of extended pocket-ligand pairs for one real pocket-ligand pair",
)
parser.add_argument(
"--TMscore_threshold",
type=float,
default=0.4,
help="TMscore threshold for extending",
)
parser.add_argument(
"--Match_rate_threshold",
type=float,
default=0.4,
help="Match_rate threshold for extending",
)
args = parser.parse_args()
homoaug_runner = HomoAugRunner(args)
homoaug_runner.run()
print("HomoAug Done!")