[548210]: / openomics / io / read_fasta.py

Download this file

14 lines (10 with data), 363 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
import dask.dataframe as dd
from pyfaidx import Faidx
def read_fasta(filepath, ) -> dd.DataFrame:
"""
Take in a path of a .fasta file and use Faidx to index it, parse the index names, then build a lazy-loading
Dask DataFrame with keys on the index and sequence as one of the columns.
Args:
filepath ():
"""
fa = Faidx(filepath)