% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prepare_data.R
\name{prepare_data}
\alias{prepare_data}
\title{Prepare Data for GNN Training}
\usage{
prepare_data(
  data,
  outcome_var,
  group_var,
  group_mappings = NULL,
  cols_to_remove = NULL
)
}
\arguments{
\item{data}{A dataframe containing the raw data.}

\item{outcome_var}{A string with the column name of the binary outcome (must be 0 or 1).}

\item{group_var}{A string with the column name of the sensitive attribute.}

\item{group_mappings}{Optional named list mapping values in \code{group_var} to numeric codes (e.g., \code{list("Male" = 0, "Female" = 1)}).}

\item{cols_to_remove}{A character vector of column names to exclude from the feature matrix (e.g., IDs, highly collinear vars).}
}
\value{
A list containing:
\item{X}{The scaled feature matrix.}
\item{y}{The numeric outcome vector.}
\item{group}{The numeric group vector.}
\item{feature_names}{The names of the features used.}
\item{subject_ids}{A vector of subject IDs, if a 'subjectid' column exists.}
\item{group_mappings}{Added as an attribute for downstream use.}
}
\description{
This function takes a raw dataframe, cleans it, defines the outcome and
group variables, and scales the feature matrix. If no \code{group_mappings}
are provided, they are automatically generated from the unique values
(or factor levels) of \code{group_var}.
}
\examples{
my_data <- data.frame(
  subjectid = 1:10,
  remission = sample(0:1, 10, replace = TRUE),
  gender = sample(c("M", "F"), 10, replace = TRUE),
  feature1 = rnorm(10),
  feature2 = rnorm(10)
)

prepared_data <- prepare_data(
  data = my_data,
  outcome_var = "remission",
  group_var = "gender",
  cols_to_remove = c("subjectid")
)
}
