% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/score_anomaly.R
\name{score_anomaly}
\alias{score_anomaly}
\title{Score Anomalies Using Unsupervised Machine Learning}
\description{
Calculates anomaly scores for each record using Isolation Forest or
Local Outlier Factor algorithms. Optionally evaluates performance against
ground truth labels for benchmarking.
}
\usage{
score_anomaly(
  data,
  method = "iforest",
  contamination = 0.05,
  ground_truth_col = NULL,
  id_cols = NULL,
  exclude_cols = NULL,
  ...
)
}
\arguments{
\item{data}{A data frame containing the data to be scored.}

\item{method}{Character string indicating the anomaly detection method.
Options: "iforest" (Isolation Forest, default) or "lof" (Local Outlier Factor).}

\item{contamination}{Numeric value between 0 and 1 indicating the expected
proportion of anomalies in the data. Default is 0.05 (5\%).}

\item{ground_truth_col}{Character string naming a column in \code{data} that
contains binary ground truth labels (0/1 or FALSE/TRUE) for known anomalies.
If provided, benchmarking metrics will be calculated. Default is NULL.}

\item{id_cols}{Character vector of column names to exclude from scoring.
Passed to \code{prep_for_anomaly()}.}

\item{exclude_cols}{Character vector of additional columns to exclude.
Passed to \code{prep_for_anomaly()}.}

\item{...}{Additional arguments passed to the underlying algorithm.
For Isolation Forest: \code{ntrees}, \code{sample_size}, \code{max_depth}.
For LOF: \code{minPts} (number of neighbors; deprecated \code{k} is converted to \code{minPts}).}
}
\value{
A data frame with the original data plus an \code{anomaly_score} column.
  If \code{ground_truth_col} is provided, the result includes an attribute
  \code{benchmark_metrics} containing: \code{auc_roc} (Area Under the ROC Curve),
  \code{auc_pr} (Area Under the Precision-Recall Curve), \code{top_k_recall}
  (List of recall values for top K records: K = 10, 50, 100, 500), and
  \code{contamination_rate} (Actual proportion flagged as anomalous).
}
\examples{
\donttest{
data <- data.frame(
  patient_id = 1:50,
  age = rnorm(50, 50, 15),
  cost = rnorm(50, 10000, 5000)
)
scored_data <- score_anomaly(data, method = "iforest", contamination = 0.05)
}
}
