% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/engine.R
\name{crossfit_multi}
\alias{crossfit_multi}
\title{Cross-fitting for multiple methods}
\usage{
crossfit_multi(
  data,
  methods,
  fold_split = function(data, K) sample(rep_len(1:K, nrow(data))),
  seed = NULL,
  aggregate_panels = identity,
  aggregate_repeats = identity,
  max_fail = Inf,
  verbose = FALSE
)
}
\arguments{
\item{data}{Data frame or matrix of size \eqn{n \times p} containing
the observations.}

\item{methods}{A (named) list of method specifications, typically
created with \code{\link{create_method}}.}

\item{fold_split}{A function of the form \code{function(data, K)}
returning a vector of length \code{nrow(data)} with integer fold
labels in \code{1:K}. It must assign at least one observation to
each fold.}

\item{seed}{Integer base random seed used for the K-fold splits; each
repetition uses \code{seed + rep_id - 1}.}

\item{aggregate_panels}{Function used as the \emph{default} aggregator
over panels (folds) for each method. It is applied to the list of
per-panel values. Methods can override this via their own
\code{aggregate_panels}.}

\item{aggregate_repeats}{Function used as the \emph{default}
aggregator over repetitions for each method. It is applied to the
list of per-repetition aggregated values. Methods can override this
via their own \code{aggregate_repeats}.}

\item{max_fail}{Non-negative integer or \code{Inf} controlling how
many repetitions a method is allowed to fail before being disabled.
Structural model failures and panel-level errors both count toward
this limit.}

\item{verbose}{Logical; if \code{TRUE}, prints a compact status line
per repetition.}
}
\value{
A list with components:
\describe{
\item{\code{estimates}}{Named list of final estimates per method
(after aggregating over panels and repetitions).}
\item{\code{per_method}}{For each method, a list with
\code{values} (per-repetition aggregated results) and
\code{errors} (error traces).}
\item{\code{repeats_done}}{Number of repetitions successfully
completed for each method.}
\item{\code{K}}{Number of folds used in the plan.}
\item{\code{K_required}}{Per-method minimal required K based on
their dependency structure.}
\item{\code{methods}}{The validated and normalized method
specifications.}
\item{\code{plan}}{The cross-fitting plan produced by
\code{build_instances()}.}
}
}
\description{
Runs cross-fitting for one or more methods defined via
\code{\link{create_method}} and \code{\link{create_nuisance}}. This
is the main engine that:
\itemize{
\item validates and normalizes method specifications,
\item builds the global instance graph and fold geometry,
\item repeatedly draws K-fold splits and evaluates all active
methods,
\item aggregates results across panels and repetitions.
}
}
\details{
Each method can operate in either \code{mode = "estimate"} (target
returns numeric values) or \code{mode = "predict"} (target returns a
prediction function). Cross-fitting ensures that nuisance models are
always trained on folds disjoint from the folds on which their
predictions are used in the target.
}
\examples{
set.seed(1)
n <- 100
x <- rnorm(n)
y <- x + rnorm(n)

data <- data.frame(x = x, y = y)

# Shared nuisance: E[Y | X]
nuis_y <- create_nuisance(
  fit = function(data, ...) lm(y ~ x, data = data),
  predict = function(model, data, ...) predict(model, newdata = data)
)

# Method 1: MSE of nuisance predictor
target_mse <- function(data, nuis_y, ...) {
  mean((data$y - nuis_y)^2)
}

# Method 2: mean fitted value
target_mean <- function(data, nuis_y, ...) {
  mean(nuis_y)
}

m1 <- create_method(
  target = target_mse,
  list_nuisance = list(nuis_y = nuis_y),
  folds = 2,
  repeats = 2,
  eval_fold = 1L,
  mode = "estimate",
  fold_allocation = "independence"
)

m2 <- create_method(
  target = target_mean,
  list_nuisance = list(nuis_y = nuis_y),
  folds = 2,
  repeats = 2,
  eval_fold = 1L,
  mode = "estimate",
  fold_allocation = "overlap"
)

cf_multi <- crossfit_multi(
  data    = data,
  methods = list(mse = m1, mean = m2),
  aggregate_panels  = mean_estimate,
  aggregate_repeats = mean_estimate
)

cf_multi$estimates
}
