deepG / Git / [1c0e03] /man/n_gram

Models:

MarcoTheBlack/

deepG

Downloads: 2

[1c0e03]: / man / n_gram_dist.Rd

History

Download this file

55 lines (46 with data), 1.4 kB

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/n_gram.R
\name{n_gram_dist}
\alias{n_gram_dist}
\title{Get distribution of n-grams}
\usage{
n_gram_dist(
  path_input,
  n = 2,
  vocabulary = c("A", "C", "G", "T"),
  format = "fasta",
  file_sample = NULL,
  step = 1,
  nuc_dist = FALSE
)
}
\arguments{
\item{path_input}{Path to folder containing fasta files or single fasta file.}

\item{n}{Size of n gram.}

\item{vocabulary}{Vector of allowed characters, samples outside vocabulary get discarded.}

\item{format}{File format, either \code{"fasta"} or \code{"fastq"}.}

\item{file_sample}{If integer, size of random sample of files in \code{path_input}.}

\item{step}{How often to take a sample.}

\item{nuc_dist}{Nucleotide distribution.}
}
\value{
Returns a matrix with distributions of nucleotides given the previous n nucleotides.

A data frame of n-gram predictions.
}
\description{
Get distribution of next character given previous n nucleotides.
}
\examples{
temp_dir <- tempfile()
dir.create(temp_dir)
create_dummy_data(file_path = temp_dir,
                  num_files = 3,
                  seq_length = 80,
                  vocabulary = c("A", "C", "G", "T"),
                  num_seq = 2)

m <- n_gram_dist(path_input = temp_dir,
                 n = 3,
                 step = 1,
                 nuc_dist = FALSE)
head(round(m, 2))
}