% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sim.R
\name{sim}
\alias{sim}
\title{Simulate data}
\usage{
sim(psi, xi)
}
\arguments{
\item{psi}{A matrix of item parameters. Rows correspond to items and columns
to parameters.}

\item{xi}{A matrix of person parameters. Rows correspond to persons and
columns to parameters.}
}
\value{
A list is returned. Possible elements include:
\item{x}{A matrix of item scores.}
\item{d}{A matrix of item distractors.}
\item{r}{A matrix of item responses.}
\item{y}{A matrix of item log response times.}
}
\description{
Simulate data using item response theory (IRT) and response time
(RT) models.
}
\section{Models for Item Scores}{
The \strong{Rasch}, \strong{2PL}, and \strong{3PL models} (Birnbaum, 1968; Rasch, 1960) are
given by
\deqn{P(X_{vi} = 1 | \theta_v, a_i, b_i, c_i) =
c_i + \frac{1 - c_i}{1 + \exp\{-a_i(\theta_v - b_i)\}}.}
\itemize{
\item \code{psi} must contain columns named \code{"a"}, \code{"b"}, and \code{"c"} for the item
discrimination, difficulty, and pseudo-guessing parameters, respectively.
\item \code{xi} must contain a column named \code{"theta"} for the person ability
parameters.
}

The \strong{partial credit model} (PCM; Masters, 1982) and the \strong{generalized
partial credit model} (GPCM; Muraki, 1992) are given by
\deqn{P(X_{vi} = j | \theta_v, a_i, \boldsymbol{c_i}) =
\frac{\exp\{\sum_{k=0}^j a_i(\theta_v - c_{ik})\}}
{\sum_{l=0}^{m_i} \exp\{\sum_{k=0}^l a_i(\theta_v - c_{ik})\}}.}
\itemize{
\item \code{psi} must contain columns named \code{"a"} for the item discrimination
parameter and \code{"c0"}, \code{"c1"}, \code{...}, for the item category parameters.
\item \code{xi} must contain a column named \code{"theta"} for the person ability
parameters.
}

The \strong{graded response model} (GRM; Samejima, 1969) is given by
\deqn{P(X_{vi} = j | \theta_v, a_i, \boldsymbol{b_i}) =
P(X_{vi} \ge j | \theta_v, a_i, \boldsymbol{b_i}) -
P(X_{vi} \ge j + 1 | \theta_v, a_i, \boldsymbol{b_i}),}
where
\deqn{P(X_{vi} \ge j | \theta_v, a_i, \boldsymbol{b_i}) = \begin{cases}
1 &\text{if } j = 0, \\
\frac{1}{1 + \exp\{-a_i(\theta_v - b_{ij})\}}
&\text{if } 1 \le j \le m_i, \\
0 &\text{if } j = m_i + 1.
\end{cases}}
\itemize{
\item \code{psi} must contain columns named \code{"a"} for the item discrimination
parameter and \code{"b1"}, \code{"b2"}, \code{...}, for the item location parameters
listed in increasing order.
\item \code{xi} must contain a column named \code{"theta"} for the person ability
parameters.
}
}

\section{Models for Item Distractors}{
The \strong{nested logit model} (NLM; Bolt et al., 2012) is given by
\deqn{P(D_{vi} = j | \theta_v, \eta_v,
a_i, b_i, c_i, \boldsymbol{\lambda_i}, \boldsymbol{\zeta_i}) =
[1 - P(X_{vi} = 1 | \theta_v, a_i, b_i, c_i)] \times
P(D_{vi} = j | X_{vi} = 0, \eta_v,
\boldsymbol{\lambda_i}, \boldsymbol{\zeta_i}),}
where
\deqn{P(D_{vi} = j | X_{vi} = 0, \eta_v,
\boldsymbol{\lambda_i}, \boldsymbol{\zeta_i}) =
\frac{\exp(\lambda_{ij} \eta_v + \zeta_{ij})}
{\sum_{k=1}^{m_i-1} \exp(\lambda_{ik} \eta_v + \zeta_{ik})}.}
\itemize{
\item \code{psi} must contain columns named \code{"a"}, \code{"b"}, and \code{"c"} for the item
discrimination, difficulty, and pseudo-guessing parameters, respectively,
\code{"lambda1"}, \code{"lambda2"}, \code{...}, for the item slope parameters, and
\code{"zeta1"}, \code{"zeta2"}, \code{...}, for the item intercept parameters.
\item \code{xi} must contain columns named \code{"theta"} and \code{"eta"} for the person
parameters that govern response correctness and distractor selection,
respectively.
}
}

\section{Models for Item Responses}{
The \strong{nominal response model} (NRM; Bock, 1972) is given by
\deqn{P(R_{vi} = j | \eta_v,
\boldsymbol{\lambda_i}, \boldsymbol{\zeta_i}) =
\frac{\exp(\lambda_{ij} \eta_v + \zeta_{ij})}
{\sum_{k=1}^{m_i} \exp(\lambda_{ik} \eta_v + \zeta_{ik})}.}
\itemize{
\item \code{psi} must contain columns named \code{"lambda1"}, \code{"lambda2"}, \code{...}, for the
item slope parameters and \code{"zeta1"}, \code{"zeta2"}, \code{...}, for the item
intercept parameters. If there is a correct response category, its
parameters should be listed last.
\item \code{xi} must contain a column named \code{"eta"} for the person parameters that
govern response selection.
}
}

\section{Models for Item Log Response Times}{
The \strong{lognormal model} (van der Linden, 2006) is given by
\deqn{f(Y_{vi} | \tau_v, \alpha_i, \beta_i) =
\frac{\alpha_i}{\sqrt{2 \pi}}
\exp\{-\frac{1}{2}[\alpha_i(Y_{vi} - (\beta_i - \tau_v))]^2\}.}
\itemize{
\item \code{psi} must contain columns named \code{"alpha"} and \code{"beta"} for the item time
discrimination and time intensity parameters, respectively.
\item \code{xi} must contain a column named \code{"tau"} for the person speed parameters.
}
}

\examples{
# Setup for Examples 1 to 5 -------------------------------------------------

# Settings
set.seed(0)     # seed for reproducibility
N <- 500        # number of persons
n <- 40         # number of items

# Example 1: 3PL Model and Lognormal Model ----------------------------------

# Generate person parameters
xi <- MASS::mvrnorm(
  N,
  mu = c(theta = 0.00, tau = 0.00),
  Sigma = matrix(c(1.00, 0.25, 0.25, 0.25), ncol = 2)
)

# Generate item parameters
psi <- cbind(
  a = rlnorm(n, meanlog = 0.00, sdlog = 0.25),
  b = NA,
  c = runif(n, min = 0.05, max = 0.30),
  alpha = runif(n, min = 1.50, max = 2.50),
  beta = NA
)

# Generate positively correlated difficulty and time intensity parameters
psi[, c("b", "beta")] <- MASS::mvrnorm(
  n,
  mu = c(b = 0.00, beta = 3.50),
  Sigma = matrix(c(1.00, 0.20, 0.20, 0.15), ncol = 2)
)

# Simulate item scores and log response times
dat <- sim(psi, xi)
x <- dat$x
y <- dat$y

# Example 2: Generalized Partial Credit Model -------------------------------

# Generate person parameters
xi <- cbind(theta = rnorm(N, mean = 0.00, sd = 1.00))

# Generate item parameters
psi <- cbind(
  a = rlnorm(n, meanlog = 0.00, sdlog = 0.25),
  c0 = 0,
  c1 = rnorm(n, mean = -1.00, sd = 0.50),
  c2 = rnorm(n, mean = 0.00, sd = 0.50),
  c3 = rnorm(n, mean = 1.00, sd = 0.50)
)

# Simulate item scores
x <- sim(psi, xi)$x

# Example 3: Graded Response Model ------------------------------------------

# Generate person parameters
xi <- cbind(theta = rnorm(N, mean = 0.00, sd = 1.00))

# Generate item parameters
psi <- cbind(
  a = rlnorm(n, meanlog = 0.00, sdlog = 0.25),
  b1 = rnorm(n, mean = -1.00, sd = 0.50),
  b2 = rnorm(n, mean = 0.00, sd = 0.50),
  b3 = rnorm(n, mean = 1.00, sd = 0.50)
)

# Sort item location parameters in increasing order
psi[, paste0("b", 1:3)] <- t(apply(psi[, paste0("b", 1:3)], 1, sort))

# Simulate item scores
x <- sim(psi, xi)$x

# Example 4: Nested Logit Model ---------------------------------------------

# Generate person parameters
xi <- MASS::mvrnorm(
  N,
  mu = c(theta = 0.00, eta = 0.00),
  Sigma = matrix(c(1.00, 0.80, 0.80, 1.00), ncol = 2)
)

# Generate item parameters
psi <- cbind(
  a = rlnorm(n, meanlog = 0.00, sdlog = 0.25),
  b = rnorm(n, mean = 0.00, sd = 1.00),
  c = runif(n, min = 0.05, max = 0.30),
  lambda1 = rnorm(n, mean = 0.00, sd = 1.00),
  lambda2 = rnorm(n, mean = 0.00, sd = 1.00),
  lambda3 = rnorm(n, mean = 0.00, sd = 1.00),
  zeta1 = rnorm(n, mean = 0.00, sd = 1.00),
  zeta2 = rnorm(n, mean = 0.00, sd = 1.00),
  zeta3 = rnorm(n, mean = 0.00, sd = 1.00)
)

# Simulate item scores and distractors
dat <- sim(psi, xi)
x <- dat$x
d <- dat$d

# Example 5: Nominal Response Model -----------------------------------------

# Generate person parameters
xi <- cbind(eta = rnorm(N, mean = 0.00, sd = 1.00))

# Generate item parameters
psi <- cbind(
  lambda1 = rnorm(n, mean = -0.50, sd = 0.50),
  lambda2 = rnorm(n, mean = -0.50, sd = 0.50),
  lambda3 = rnorm(n, mean = -0.50, sd = 0.50),
  lambda4 = rnorm(n, mean = 1.50, sd = 0.50),
  zeta1 = rnorm(n, mean = -0.50, sd = 0.50),
  zeta2 = rnorm(n, mean = -0.50, sd = 0.50),
  zeta3 = rnorm(n, mean = -0.50, sd = 0.50),
  zeta4 = rnorm(n, mean = 1.50, sd = 0.50)
)

# Simulate item responses
r <- sim(psi, xi)$r
}
\references{
Birnbaum, A. (1968). Some latent trait models and their use in inferring an
examinee's ability. In F. M. Lord & M. R. Novick (Eds.), \emph{Statistical
theories of mental test scores} (pp. 397--479). Addison-Wesley.

Bock, R. D. (1972). Estimating item parameters and latent ability when
responses are scored in two or more nominal categories. \emph{Psychometrika},
\emph{37}(1), 29--51.

Bolt, D. M., Wollack, J. A., & Suh, Y. (2012). Application of a
multidimensional nested logit model to multiple-choice test items.
\emph{Psychometrika}, \emph{77}(2), 339--357.

Masters, G. N. (1982). A Rasch model for partial credit scoring.
\emph{Psychometrika}, \emph{47}(2), 149--174.

Muraki, E. (1992). A generalized partial credit model: Application of an EM
algorithm. \emph{Applied Psychological Measurement}, \emph{16}(2), 159--176.

Rasch, G. (1960). \emph{Probabilistic models for some intelligence and attainment
tests}. Danish Institute for Educational Research.

Samejima, F. (1969). Estimation of latent ability using a response pattern of
graded scores. \emph{Psychometrika}, \emph{34}(S1), 1--97.

van der Linden, W. J. (2006). A lognormal model for response times on test
items. \emph{Journal of Educational and Behavioral Statistics}, \emph{31}(2), 181--204.
}
