% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/validation.R
\name{holdout_validation}
\alias{holdout_validation}
\title{Perform holdout validation for PTSD diagnostic models}
\usage{
holdout_validation(
  data,
  train_ratio = 0.7,
  score_by = "newly_nondiagnosed",
  seed = 123
)
}
\arguments{
\item{data}{A dataframe containing exactly 20 columns with PCL-5 item scores
(output of rename_ptsd_columns). Each symptom should be scored on a 0-4 scale.}

\item{train_ratio}{Numeric between 0 and 1 indicating proportion of data for training
(default: 0.7 for 70/30 split)}

\item{score_by}{Character string specifying optimization criterion:
\itemize{
  \item "false_cases": Minimize total misclassifications
  \item "newly_nondiagnosed": Minimize false negatives only (default)
}}

\item{seed}{Integer for random number generation reproducibility (default: 123)}
}
\value{
A list containing:
\itemize{
  \item without_clusters: Results for model without cluster representation
    \itemize{
      \item best_combinations: The 3 best six-symptom combinations from training
      \item test_results: Diagnostic comparison on test data
      \item summary: Formatted summary statistics
    }
  \item with_clusters: Results for model with cluster representation
    \itemize{
      \item best_combinations: The 3 best six-symptom combinations from training
      \item test_results: Diagnostic comparison on test data
      \item summary: Formatted summary statistics
    }
}
}
\description{
Validates PTSD diagnostic models using a train-test split approach (holdout validation).
Trains the model on a portion of the data and evaluates performance on the held-out test set.
}
\details{
The function:
\enumerate{
  \item Splits data into training (70%) and test (30%) sets
  \item Finds optimal symptom combinations on training data
  \item Evaluates these combinations on test data
  \item Compares results to original DSM-5 diagnoses
}

Two models are evaluated:
\itemize{
  \item Model without cluster representation: Any 4 of 6 symptoms
  \item Model with cluster representation: 4 of 6 symptoms with at least one from each cluster
}
}
\examples{
# Create sample data
set.seed(42)
sample_data <- data.frame(
  matrix(sample(0:4, 20 * 200, replace = TRUE),
         nrow = 200,
         ncol = 20)
)
colnames(sample_data) <- paste0("symptom_", 1:20)

\donttest{
# Perform holdout validation
validation_results <- holdout_validation(sample_data, train_ratio = 0.7)

# Access results
validation_results$without_clusters$summary
validation_results$with_clusters$summary
}

}
