% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate_compare_report.R
\name{generate_compare_report}
\alias{generate_compare_report}
\title{Compare DEV vs VAL datasets (PROC COMPARE-style) with robust file detection}
\usage{
generate_compare_report(
  domain,
  dev_dir,
  val_dir,
  by_vars = c("STUDYID", "USUBJID"),
  vars_to_check = NULL,
  report_dir = NULL,
  prefix_val = "v_",
  max_print = 50,
  write_csv = FALSE,
  run_comparedf = TRUE,
  filter_expr = NULL,
  study_id = NULL,
  author = NULL
)
}
\arguments{
\item{domain}{Character scalar domain name (e.g., \code{"adsl"}, \code{"adae"}, \code{"rt-ae-sum"}).
Matching is case-insensitive.}

\item{dev_dir}{DEV dataset directory path.}

\item{val_dir}{VAL dataset directory path.}

\item{by_vars}{Character vector of key variables used to match records
(e.g., \code{c("STUDYID","USUBJID")} or \code{c("STUDYID","USUBJID","AESEQ")}).}

\item{vars_to_check}{Optional character vector of variables to compare.
If \code{NULL}, compares all common variables (excluding key handling remains as per implementation).}

\item{report_dir}{Output directory for report files. Created if missing.}

\item{prefix_val}{Character prefix for validation datasets (default \code{"v_"}).
The resolver also supports variants like \verb{v-} and \code{v} (no separator).}

\item{max_print}{Maximum number of lines printed in the \code{.lst} report for summaries/diffs.}

\item{write_csv}{Logical; if \code{TRUE}, writes PROC COMPARE-style CSV to \code{report_dir} as
\verb{compare_<domain>.csv}.}

\item{run_comparedf}{Logical; if \code{TRUE}, uses \code{arsenal::comparedf()} to generate a \code{.lst} report.}

\item{filter_expr}{Optional filter expression \strong{string} evaluated within each dataset
(e.g., \code{"SAFFL == 'Y' & TRTEMFL == 'Y'"}).}

\item{study_id}{Optional study identifier included in the \code{.lst} header.}

\item{author}{Optional author name included in the \code{.lst} header.}
}
\value{
Invisibly returns a list with:
\itemize{
\item \code{only_in_dev}: rows present only in DEV (set-difference result)
\item \code{only_in_val}: rows present only in VAL (set-difference result)
\item \code{comparedf}: \code{arsenal::comparedf} object (or \code{NULL} if \code{run_comparedf = FALSE})
}
}
\description{
\code{generate_compare_report()} compares a \strong{developer (DEV)} dataset and a \strong{validation (VAL)}
dataset for a given \code{domain} and produces outputs similar to SAS \verb{PROC COMPARE}.

This function is intended for ADaM/SDTM/TFL validation workflows and supports:
\itemize{
\item \strong{Directory-driven inputs}: DEV and VAL locations are provided via \code{dev_dir} and \code{val_dir}.
\item \strong{Case-insensitive domain matching}: \code{domain = "ADAE"} will match files like \verb{adae.*}.
\item \strong{VAL prefix flexibility}: resolves \code{prefix_val} variants such as \code{v_}, \verb{v-}, and \code{v} (no separator).
\item \strong{Automatic extension detection} for DEV and VAL files: \code{.sas7bdat}, \code{.xpt}, \code{.csv}, \code{.rds}.
\item \strong{Optional filtering} using \code{filter_expr} prior to comparison.
\item \strong{Optional PROC COMPARE-style CSV} output with \code{BASE}, \code{COMPARE}, and \code{DIF} triplets.
\item \strong{Optional LST-like report} using \code{arsenal::comparedf()} for summarized differences.
}
}
\details{
The function looks for exactly one matching domain file per directory:
\itemize{
\item DEV: \verb{<domain>.<ext>}
\item VAL: \verb{<prefix><domain>.<ext>} where \verb{<prefix>} is \code{prefix_val} plus common variants
supporting underscore/hyphen/no-separator forms (e.g., \code{v_}, \verb{v-}, \code{v}).
}

Supported extensions (priority order) are:
\code{sas7bdat}, \code{xpt}, \code{csv}, \code{rds}.

If multiple matches exist for the same domain in a directory (e.g., \code{adae.csv} and \code{adae.xpt}),
the function stops with an \strong{ambiguous match} error to prevent accidental comparisons.

PROC COMPARE-style CSV behavior
When \code{write_csv = TRUE}, the output includes:
\itemize{
\item \verb{_TYPE_} with values \code{BASE}, \code{COMPARE}, \code{DIF}
\item \verb{_OBS_} sequence within each BY key
\item For numeric variables, \code{DIF = DEV - VAL}
\item For Date variables, \code{DIF} is \strong{integer day difference} (\code{as.integer(DEV - VAL)})
\item For POSIXct variables, \code{DIF} is \strong{seconds difference} (\code{as.numeric(DEV - VAL)})
\item For other types, \code{DIF} is a character mask (\code{X} indicates difference)
}
}
\examples{

td <- tempdir()
dev_dir <- file.path(td, "dev")
val_dir <- file.path(td, "val")
rpt_dir <- file.path(td, "rpt")
dir.create(dev_dir, showWarnings = FALSE)
dir.create(val_dir, showWarnings = FALSE)
dir.create(rpt_dir, showWarnings = FALSE)


dev <- data.frame(
  STUDYID = "STDY1",
  USUBJID = c("01", "02"),
  AESEQ   = c(1, 1),
  AETERM  = c("HEADACHE", "NAUSEA"),
  stringsAsFactors = FALSE
)
val <- dev
val$AETERM[2] <- "VOMITING"

utils::write.csv(dev, file.path(dev_dir, "adae.csv"), row.names = FALSE)
utils::write.csv(val, file.path(val_dir, "v-adae.csv"), row.names = FALSE)


generate_compare_report(
  domain        = "adae",
  dev_dir       = dev_dir,
  val_dir       = val_dir,
  by_vars       = c("STUDYID","USUBJID","AESEQ"),
  report_dir    = rpt_dir,
  write_csv     = TRUE,
  run_comparedf = FALSE
)


generate_compare_report(
  domain        = "ADAE",
  dev_dir       = dev_dir,
  val_dir       = val_dir,
  by_vars       = c("STUDYID","USUBJID","AESEQ"),
  report_dir    = rpt_dir,
  write_csv     = FALSE,
  run_comparedf = FALSE
)


generate_compare_report(
  domain        = "adae",
  dev_dir       = dev_dir,
  val_dir       = val_dir,
  by_vars       = c("STUDYID","USUBJID","AESEQ"),
  report_dir    = rpt_dir,
  filter_expr   = "USUBJID == '02'",
  write_csv     = TRUE,
  run_comparedf = FALSE
)

}
\seealso{
\code{\link[arsenal]{comparedf}}, \code{\link[data.table]{fsetdiff}},
\code{\link[data.table]{fintersect}}
}
