% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prep_for_anomaly.R
\name{prep_for_anomaly}
\alias{prep_for_anomaly}
\title{Prepare Data for Anomaly Detection}
\usage{
prep_for_anomaly(
  data,
  id_cols = NULL,
  exclude_cols = NULL,
  scale_method = "mad"
)
}
\arguments{
\item{data}{A data frame containing the data to be preprocessed.}

\item{id_cols}{Character vector of column names to exclude from scoring
(e.g., patient IDs, encounter IDs). If NULL, attempts to auto-detect
common ID column patterns.}

\item{exclude_cols}{Character vector of additional columns to exclude from
scoring. Default is NULL.}

\item{scale_method}{Character string indicating the scaling method for
numerical variables. Options: "mad" (Median Absolute Deviation, default),
"minmax" (min-max normalization), or "none" (no scaling).}
}
\value{
A list containing:
  \item{prepared_data}{A numeric matrix ready for anomaly detection}
  \item{metadata}{A list with mapping information:
    \itemize{
      \item original_data: The original data frame
      \item id_cols: Column names used as identifiers
      \item numeric_cols: Column names of numeric variables
      \item categorical_cols: Column names of categorical variables
      \item excluded_cols: Column names excluded from scoring
    }
  }
}
\description{
Preprocesses data for unsupervised anomaly detection by handling identifiers,
scaling numerical features, and encoding categorical variables.
}
\examples{
data <- data.frame(
  patient_id = 1:20,
  age = rnorm(20, 50, 15),
  cost = rnorm(20, 10000, 5000),
  gender = sample(c("M", "F"), 20, replace = TRUE)
)
prep_result <- prep_for_anomaly(data, id_cols = "patient_id")
}
