--- a
+++ b/man/predict_with_n_gram.Rd
@@ -0,0 +1,64 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/n_gram.R
+\name{predict_with_n_gram}
+\alias{predict_with_n_gram}
+\title{Predict the next nucleotide using n-gram}
+\usage{
+predict_with_n_gram(
+  path_input,
+  distribution_matrix,
+  default_pred = "random",
+  vocabulary = c("A", "C", "G", "T"),
+  file_sample = NULL,
+  format = "fasta",
+  return_data_frames = FALSE,
+  step = 1
+)
+}
+\arguments{
+\item{path_input}{Path to folder containing fasta files or single fasta file.}
+
+\item{distribution_matrix}{A data frame containing frequency of next nucleotide given the previous n nucleotides (output of \code{\link{n_gram_dist}} function).}
+
+\item{default_pred}{Either character from vocabulary or \code{"random"}. Will be used as prediction if certain n-gram did not appear before.
+If \code{"random"} assign random prediction.}
+
+\item{vocabulary}{Vector of allowed characters, samples outside vocabulary get discarded.}
+
+\item{file_sample}{If integer, size of random sample of files in \code{path_input}.}
+
+\item{format}{File format, either \code{"fasta"} or \code{"fastq"}.}
+
+\item{return_data_frames}{Boolean, whether to return data frame with input, predictions, target position and true target.}
+
+\item{step}{How often to take a sample.}
+}
+\value{
+List of prediction evaluations.
+}
+\description{
+Predict the next nucleotide using n-gram.
+}
+\examples{
+# create dummy fasta files
+temp_dir <- tempfile()
+dir.create(temp_dir)
+create_dummy_data(file_path = temp_dir,
+                  num_files = 3,
+                  seq_length = 8,
+                  vocabulary = c("A", "C", "G", "T"),
+                  num_seq = 2)
+
+m <- n_gram_dist(path_input = temp_dir,
+                 n = 3,
+                 step = 1,
+                 nuc_dist = FALSE)
+
+# use distribution matrix to make predictions for one file
+predictions <- predict_with_n_gram(path_input = list.files(temp_dir, full.names = TRUE)[1], 
+                                   distribution_matrix = m)
+
+# show accuracy
+predictions[[1]]
+
+}