% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mst.R
\name{mst}
\alias{mst}
\alias{mst.default}
\alias{mst.dist}
\title{Euclidean and Mutual Reachability Minimum Spanning Trees}
\usage{
mst(d, ...)

\method{mst}{default}(
  d,
  M = 0L,
  distance = c("euclidean", "l2", "manhattan", "cityblock", "l1", "cosine"),
  verbose = FALSE,
  ...
)

\method{mst}{dist}(d, M = 0L, verbose = FALSE, ...)
}
\arguments{
\item{d}{either a numeric matrix (or an object coercible to one,
e.g., a data frame with numeric-like columns) or an
object of class \code{dist}; see \code{\link[stats]{dist}}}

\item{...}{further arguments passed to \code{\link[quitefastmst]{mst_euclid}}
from \pkg{quitefastmst}}

\item{M}{smoothing factor; \eqn{M=0} selects the requested
\code{distance}; otherwise, the corresponding degree-\code{M} mutual
reachability distance is used}

\item{distance}{metric used in the case where \code{d} is a matrix; one of:
\code{"euclidean"} (synonym: \code{"l2"}),
\code{"manhattan"} (a.k.a. \code{"l1"} and \code{"cityblock"}),
\code{"cosine"}}

\item{verbose}{logical; whether to print diagnostic messages
and progress information}
}
\value{
Returns a numeric matrix of class \code{mst} with \eqn{n-1} rows and
three columns: \code{from}, \code{to}, and \code{dist}.
Its \eqn{i}-th row specifies the \eqn{i}-th edge of the MST
which is incident to the vertices \code{from[i]} and \code{to[i]} with
\code{from[i] < to[i]}  (in \eqn{1,...,n}) and \code{dist[i]} gives
the corresponding weight, i.e., the distance between the point pair.
Edges are ordered increasingly with respect to their weights.

The \code{Size} attribute specifies the number of points, \eqn{n}.
The \code{Labels} attribute gives the labels of the input points,
if available.
The \code{method} attribute provides the name of the distance function used.

If \eqn{M>0}, the \code{nn.index} attribute gives the indexes
of the \code{M} nearest neighbours of each point
and \code{nn.dist} provides the corresponding distances,
both in the form of an \eqn{n} by \eqn{M} matrix.
}
\description{
A Euclidean minimum spanning tree (MST) provides a computationally
convenient representation of a dataset: the \eqn{n} points are connected
via \eqn{n-1} shortest segments.  Provided that the dataset
has been appropriately preprocessed so that the distances between the
points are informative, an MST can be applied in outlier detection,
clustering, density estimation, dimensionality reduction, and many other
topological data analysis tasks.
}
\details{
If \code{d} is a matrix and the Euclidean distance is requested
(the default), then the MST is computed via a call to
\code{\link[quitefastmst]{mst_euclid}} from \pkg{quitefastmst}.
It is efficient in low-dimensional spaces.  Otherwise, a general-purpose
implementation of the Jarník (Prim/Dijkstra)-like \eqn{O(n^2)}-time algorithm
is called.

If \eqn{M>0}, then the minimum spanning tree is computed with respect to
a mutual reachability distance (Campello et al., 2013):
\eqn{d_M(i,j)=\max(d(i,j), c_M(i), c_M(j))}, where \eqn{d(i,j)} is
an ordinary distance and \eqn{c_M(i)} is the core distance given by
\eqn{d(i,k)} with \eqn{k} being \eqn{i}'s \eqn{M}-th nearest neighbour
(not including self, unlike in Campello et al., 2013).
This pulls outliers away from their neighbours.

If the distances are not unique, there might be multiple trees
spanning a given graph that meet the minimality property.
}
\examples{
library("datasets")
data("iris")
X <- jitter(as.matrix(iris[1:2]))  # some data
T <- mst(X)
plot(X, asp=1, las=1)
segments(X[T[, 1], 1], X[T[, 1], 2],
         X[T[, 2], 1], X[T[, 2], 2])

}
\references{
V. Jarník, O jistem problemu minimalnim (z dopisu panu O. Borůvkovi),
\emph{Prace Moravske Prirodovedecke Spolecnosti} 6, 1930, 57-63

C.F. Olson, Parallel algorithms for hierarchical clustering,
\emph{Parallel Computing} 21, 1995, 1313-1325

R. Prim, Shortest connection networks and some generalisations,
\emph{The Bell System Technical Journal} 36(6), 1957, 1389-1401

O. Borůvka, O jistém problému minimálním, \emph{Práce Moravské
Přírodovědecké Společnosti} 3, 1926, 37–58

J.L. Bentley, Multidimensional binary search trees used for associative
searching, \emph{Communications of the ACM} 18(9), 509–517, 1975,
\doi{10.1145/361002.361007}
W.B. March, R. Parikshit, A. Gray, Fast Euclidean minimum spanning
tree: Algorithm, analysis, and applications, \emph{Proc. 16th ACM SIGKDD
Intl. Conf. Knowledge Discovery and Data Mining (KDD '10)}, 2010, 603–612

R.J.G.B. Campello, D. Moulavi, J. Sander,
Density-based clustering based on hierarchical density estimates,
\emph{Lecture Notes in Computer Science} 7819, 2013, 160-172,
\doi{10.1007/978-3-642-37456-2_14}

M. Gagolewski, quitefastmst, in preparation, 2026, TODO
}
\seealso{
The official online manual of \pkg{deadwood} at \url{https://deadwood.gagolewski.com/}

\code{\link[quitefastmst]{mst_euclid}}
}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski}
}
