% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vs_usearch_global.R
\name{vs_usearch_global}
\alias{vs_usearch_global}
\alias{usearch_global}
\alias{global_alignment}
\title{Global pairwise alignment}
\usage{
vs_usearch_global(
  fastx_input,
  database,
  userout = NULL,
  otutabout = NULL,
  userfields = "query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits",
  gapopen = "20I/2E",
  gapext = "2I/1E",
  id = 0.7,
  strand = "plus",
  maxaccepts = 1,
  maxrejects = 32,
  threads = 1,
  vsearch_options = NULL,
  tmpdir = NULL
)
}
\arguments{
\item{fastx_input}{(Required). A FASTA/FASTQ file path or FASTA/FASTQ object.
See \emph{Details}.}

\item{database}{(Required). A FASTA/FASTQ file path or FASTA/FASTQ tibble
object containing the target sequences.}

\item{userout}{(Optional). A character string specifying the name of the
output file for the alignment results. If \code{NULL} (default), no output is
written to a file and the results are returned as a tibble with the columns
specified in \code{userfields}. See \emph{Details}.}

\item{otutabout}{(Optional). A character string specifying the name of the
output file in an OTU table format. If \code{NULL} (default), no output is
written to a file. If \code{TRUE}, the output is returned as a tibble. See
\emph{Details}.}

\item{userfields}{(Optional). Fields to include in the output file. Defaults
to \code{"query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits"}.
See \emph{Details}.}

\item{gapopen}{(Optional). Penalties for gap opening. Defaults to
\code{"20I/2E"}. See \emph{Details}.}

\item{gapext}{(Optional). Penalties for gap extension. Defaults to
\code{"2I/1E"}. See \emph{Details}.}

\item{id}{(Optional). Pairwise identity threshold. Defines the minimum
identity required for matches. Defaults to \code{0.7}.}

\item{strand}{(Optional). Specifies which strand to consider when comparing
sequences. Can be either \code{"plus"} (default) or \code{"both"}.}

\item{maxaccepts}{(Optional). Maximum number of matching target sequences to
accept before stopping the search for a given query. Defaults to \code{1}.}

\item{maxrejects}{(Optional). Maximum number of non-matching target sequences
to consider before stopping the search for a given query. Defaults to 32. If
\code{maxaccepts} and \code{maxrejects} are both set to 0, the complete
database is searched.}

\item{threads}{(Optional). Number of computational threads to be used by
\code{VSEARCH}. Defaults to \code{1}.}

\item{vsearch_options}{(Optional). Additional arguments to pass to
\code{VSEARCH}. Defaults to \code{NULL}. See \emph{Details}.}

\item{tmpdir}{(Optional). Path to the directory where temporary files should
be written when tables are used as input or output. Defaults to
\code{NULL}, which resolves to the session-specific temporary directory
(\code{tempdir()}).}
}
\value{
A tibble or \code{NULL}.

If \code{userout} is specified the alignment results are written to the
specified file, and no tibble is returned. If \code{userout} is \code{NULL} a
tibble containing the alignment results with the fields specified by
\code{userfields} is returned.

If \code{otutabout} is \code{TRUE}, an OTU table is returned as a tibble.
If \code{otutabout} is a character string, the output is written to the file,
and no tibble is returned.
}
\description{
\code{vs_usearch_global} performs global pairwise alignment of query
sequences against target sequences using \code{VSEARCH}.
}
\details{
Performs global pairwise alignment between query and target sequences using
\code{VSEARCH}, and reports matches based on the specified pairwise identity
threshold (\code{id}). Only alignments that meet or exceed the identity
threshold are included in the output.

\code{fastx_input} and \code{database} can either be file paths to a
FASTA/FASTQ files or FASTA/FASTQ objects. FASTA objects are tibbles that
contain the columns \code{Header} and \code{Sequence}, see
\code{\link[microseq]{readFasta}}. FASTQ objects are tibbles that contain the
columns \code{Header}, \code{Sequence}, and \code{Quality}, see
\code{\link[microseq]{readFastq}}.

\code{userfields} specifies the fields to include in the output file. Fields
must be given as a character string separated by \code{"+"}. The default
value of \code{userfields} equals
\code{"query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits"}, which
gives a blast-like tab-separated format of twelve fields. See the
'Userfields' section in the \code{VSEARCH} manual for more information.

\code{otutabout} gives the option to output the results in an OTU
table format with tab-separated columns. When writing to a file, the first
line starts with the string "#OTU ID", followed by a tab-separated list of
all sample identifiers (formatted as "sample=X"). Each subsequent line,
corresponding to an OTU, begins with the OTU identifier and is followed by
tab-separated abundances for that OTU in each sample. If \code{otutabout} is
a character string, the output is written to the specified file. If
\code{otutabout} is \code{TRUE}, the function returns the OTU table as a
tibble, where the first column is named \code{otu_id} instead of "#OTU ID".

Pairwise identity (\code{id}) is calculated as the number of matching columns
divided by the alignment length minus terminal gaps.

\code{vsearch_options} allows users to pass additional command-line arguments
to \code{VSEARCH} that are not directly supported by this function. Refer to
the \code{VSEARCH} manual for more details.

Visit the \code{VSEARCH}
\href{https://github.com/torognes/vsearch?tab=readme-ov-file#getting-help}{documentation}
for information about defining \code{gapopen} and \code{gapext}.
}
\examples{
\dontrun{
# You would typically use something else as database
query_file <- file.path(file.path(path.package("Rsearch"), "extdata"),
                     "small.fasta")
db <- query_file

# Run global pairwise alignment with default parameters and write results to file
vs_usearch_global(fastx_input = query_file,
                  database = db,
                  userout = "delete_me.txt")

# Read results, and give column names
result.tbl <- read.table("delete_me.txt",
                         sep = "\t",
                         header = FALSE,
                         col.names = c("query", "target", "id", "alnlen",
                                       "mism", "opens", "qlo", "qhi",
                                       "tlo", "thi", "evalue", "bits"))
}

}
\references{
\url{https://github.com/torognes/vsearch}
}
