deepG / Git / Diff of /man/n_gram

Models:
MarcoTheBlack/
deepG
Downloads: 2
Diff of /man/n_gram_dist.Rd [000000] .. [409433]
Switch to side-by-side view

--- a
+++ b/man/n_gram_dist.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/n_gram.R
+\name{n_gram_dist}
+\alias{n_gram_dist}
+\title{Get distribution of n-grams}
+\usage{
+n_gram_dist(
+  path_input,
+  n = 2,
+  vocabulary = c("A", "C", "G", "T"),
+  format = "fasta",
+  file_sample = NULL,
+  step = 1,
+  nuc_dist = FALSE
+)
+}
+\arguments{
+\item{path_input}{Path to folder containing fasta files or single fasta file.}
+
+\item{n}{Size of n gram.}
+
+\item{vocabulary}{Vector of allowed characters, samples outside vocabulary get discarded.}
+
+\item{format}{File format, either \code{"fasta"} or \code{"fastq"}.}
+
+\item{file_sample}{If integer, size of random sample of files in \code{path_input}.}
+
+\item{step}{How often to take a sample.}
+
+\item{nuc_dist}{Nucleotide distribution.}
+}
+\value{
+Returns a matrix with distributions of nucleotides given the previous n nucleotides.
+
+A data frame of n-gram predictions.
+}
+\description{
+Get distribution of next character given previous n nucleotides.
+}
+\examples{
+temp_dir <- tempfile()
+dir.create(temp_dir)
+create_dummy_data(file_path = temp_dir,
+                  num_files = 3,
+                  seq_length = 80,
+                  vocabulary = c("A", "C", "G", "T"),
+                  num_seq = 2)
+
+m <- n_gram_dist(path_input = temp_dir,
+                 n = 3,
+                 step = 1,
+                 nuc_dist = FALSE)
+head(round(m, 2))
+}