--- a +++ b/man/split_fasta.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocess.R +\name{split_fasta} +\alias{split_fasta} +\title{Split fasta file into smaller files.} +\usage{ +split_fasta( + path_input, + target_folder, + split_n = 500, + shuffle_entries = TRUE, + delete_input = FALSE +) +} +\arguments{ +\item{path_input}{Fasta file to split into smaller files} + +\item{target_folder}{Directory for output.} + +\item{split_n}{Maximum number of entries to use in smaller file.} + +\item{shuffle_entries}{Whether to shuffle fasta entries before split.} + +\item{delete_input}{Whether to delete the original file.} +} +\value{ +None. Writes files to output. +} +\description{ +Returns smaller files with same file name and "_x" (where x is an integer). For example, +assume we have input file called "abc.fasta" with 100 entries and \code{split_n = 50}. Function will +create two files called "abc_1.fasta" and "abc_2.fasta" in \code{target_path}. +} +\examples{ +path_input <- tempfile(fileext = '.fasta') +create_dummy_data(file_path = path_input, + num_files = 1, + write_to_file_path = TRUE, + seq_length = 7, + num_seq = 25, + vocabulary = c("a", "c", "g", "t")) +target_folder <- tempfile() +dir.create(target_folder) + +# split 25 entries into 5 files +split_fasta(path_input = path_input, + target_folder = target_folder, + split_n = 5) +length(list.files(target_folder)) + +}