--- a +++ b/man/n_gram_dist.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/n_gram.R +\name{n_gram_dist} +\alias{n_gram_dist} +\title{Get distribution of n-grams} +\usage{ +n_gram_dist( + path_input, + n = 2, + vocabulary = c("A", "C", "G", "T"), + format = "fasta", + file_sample = NULL, + step = 1, + nuc_dist = FALSE +) +} +\arguments{ +\item{path_input}{Path to folder containing fasta files or single fasta file.} + +\item{n}{Size of n gram.} + +\item{vocabulary}{Vector of allowed characters, samples outside vocabulary get discarded.} + +\item{format}{File format, either \code{"fasta"} or \code{"fastq"}.} + +\item{file_sample}{If integer, size of random sample of files in \code{path_input}.} + +\item{step}{How often to take a sample.} + +\item{nuc_dist}{Nucleotide distribution.} +} +\value{ +Returns a matrix with distributions of nucleotides given the previous n nucleotides. + +A data frame of n-gram predictions. +} +\description{ +Get distribution of next character given previous n nucleotides. +} +\examples{ +temp_dir <- tempfile() +dir.create(temp_dir) +create_dummy_data(file_path = temp_dir, + num_files = 3, + seq_length = 80, + vocabulary = c("A", "C", "G", "T"), + num_seq = 2) + +m <- n_gram_dist(path_input = temp_dir, + n = 3, + step = 1, + nuc_dist = FALSE) +head(round(m, 2)) +}