% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Deriv.R
\name{Deriv}
\alias{Deriv}
\alias{drule}
\title{Symbolic differentiation of an expression or function}
\usage{
Deriv(
  f,
  x = if (is.function(f)) NULL else all.vars(if (is.character(f)) parse(text = f) else f),
  env = if (is.function(f)) environment(f) else parent.frame(),
  use.D = FALSE,
  cache.exp = TRUE,
  nderiv = NULL,
  combine = "c",
  drule = Deriv::drule
)
}
\arguments{
\item{f}{An expression or function to be differentiated.
f can be \itemize{
 \item a user defined function: \code{function(x) x**n}
 \item a string: \code{"x**n"}
 \item an expression: \code{expression(x**n)}
 \item a call: \code{call("^", quote(x), quote(n))}
 \item a language: \code{quote(x**n)}
 \item a right hand side of a formula: \code{~ x**n} or \code{y ~ x**n}
}}

\item{x}{An optional character vector with variable name(s) with respect to which
\code{f} must be differentiated. If not provided (i.e. x=NULL), x is
guessed either from \code{names(formals(f))} (if \code{f} is a function)
or from all variables in f in other cases.
To differentiate expressions including components of lists or vectors, i.e. by expressions like
\code{p[1]}, \code{theta[["alpha"]]} or \code{theta$beta}, the vector of
variables \code{x}
must be a named vector. For the cited examples, the argument \code{x} must be given
as follows \code{c(p="1", theta="alpha", theta="beta")}.
Note the repeated name \code{theta} which must be provided for every component
of the list \code{theta} by which a
differentiation is required.}

\item{env}{An environment where the symbols and functions are searched for.
Defaults to \code{parent.frame()} for \code{f} expression and to
\code{environment(f)} if \code{f} is a function. For primitive function,
it is set by default to .GlobalEnv}

\item{use.D}{An optional logical (default FALSE), indicates if base::D()
must be used for differentiation of basic expressions.}

\item{cache.exp}{An optional logical (default TRUE), indicates if
final expression must be optimized with cached sub-expressions.
If enabled, repeated calculations are made only once and their
results stored in cache variables which are then reused.}

\item{nderiv}{An optional integer vector of derivative orders to calculate.
Default NULL value correspond to one differentiation. If length(nderiv)>1,
the resulting expression is a list where each component corresponds to derivative order
given in nderiv. Value 0 corresponds to the original function or expression  non
differentiated. All values must be non negative. If the entries in nderiv
are named, their names are used as names in the returned list. Otherwise
the value of nderiv component is used as a name in the resulting list.}

\item{combine}{An optional character scalar, it names a function to combine
partial derivatives. Default value is "c" but other functions can be
used, e.g. "cbind" (cf. Details, NB3), "list" or user defined ones. It must
accept any number of arguments or at least the same number of arguments as
there are items in \code{x}.}

\item{drule}{An optional environment-like containing derivative rules (cf. Details for syntax rules).}
}
\value{
\itemize{
 \item a function if \code{f} is a function
 \item an expression if \code{f} is an expression
 \item a character string if \code{f} is a character string
 \item a language (usually a so called 'call' but may be also a symbol or just a numeric) for other types of \code{f}
}
}
\description{
Symbolic differentiation of an expression or function
}
\details{
R already contains two differentiation functions: \code{stats::D} and \code{stats:deriv}.
\code{D} does
simple univariate differentiation while  \code{deriv} uses \code{D}
to do multivariate
differentiation.  The output of \code{D} is an expression, whereas the output of
\code{deriv} can be an executable function.

R's existing functions have several limitations.  They can probably be fixed,
but since they are written in C, this would probably require a lot of work.
Limitations include:
\itemize{
 \item The derivatives table can't be modified at runtime, and is only available
in C.
 \item Function cannot substitute function calls.  e.g.:
f <- function(x, y) x + y; deriv(~f(x, x^2), "x") is not working.
}

So, here are the advantages of this implementation:

\itemize{
 \item It is entirely written in R, so would be easier to maintain.
 \item Can do multi-variate differentiation.
 \item Can differentiate function calls:
 \itemize{
   \item if the function is in the derivative table, then the chain rule
is applied.  For example, if you declared that the derivative of
\code{sin} is \code{cos}, then it would figure out how to call
 \code{cos} correctly in case of complex argument.
   \item if the function is not in the derivative table (or it is anonymous),
then the function body is substituted in.
   \item these two methods can be mixed.  An entry in the derivative table
need not be self-contained -- you don't need to provide an infinite
chain of derivatives.
 }
 \item It's easy to add custom entries to the derivatives table, e.g.

  \code{drule[["cos"]] <- alist(x=-sin(x))}

  The chain rule will be automatically applied if needed. In their custom rules,
  users should avoid using variable names like \code{.e1}, \code{.e2} etc. which
  can be confounded with those automatically created by \code{Deriv} for code caching
  purposes.
 \item The output is an executable function, which makes it suitable
     for use in optimization problems.
 \item Compound functions (i.e. piece-wise functions based on if-else operator) can
     be differentiated (cf. examples section).
 \item in case of multiple derivatives (e.g. gradient and hessian calculation),
     caching can make calculation economies for both
 \item Starting from v4.0, some matrix calculus operations are possible (contribution of Andreas Rappold). See an example hereafter for differentiation of the inverse of 2x2 matrix and whose elements depend on variable of differentiation \code{x}.
}

Two environments \code{drule} and \code{simplifications} are
exported in the package's NAMESPACE.
As their names indicate, they contain tables of derivative and
simplification rules.
To see the list of defined rules do \code{ls(drule)}.
To add your own derivative rule for a function called say \code{sinpi(x)} calculating sin(pi*x), do \code{drule[["sinpi"]] <- alist(x=pi*cospi(x))}.
Here, "x" stands for the first and unique argument in \code{sinpi()} definition. For a function that might have more than one argument,
e.g. \code{log(x, base=exp(1))}, the drule entry must be a list with a named rule
per argument. See \code{drule$log} for an example to follow.
After adding \code{sinpi} you can differentiate expressions like
\code{Deriv(~ sinpi(x^2), "x")}. The chain rule will automatically apply.

Starting from v4.0, user can benefit from a syntax \code{.d_X} in the rule writing.
Here \code{X} must be replaced by an argument name (cf. \code{drule[["solve"]]} for
an example). A use of this syntax leads to a replacement of this place-holder by a
derivative of the function (chain rule is automatically integrated) by the named argument.
\cr
Another v4.0 novelty in rule's syntax is a possible use of optional parameter
\code{`_missing`} which can be set to TRUE or FALSE (default) to indicate how
to treat missing arguments. By default, i.e. in absence of this parameter
or set to FALSE, missing arguments were replaced by their default values.
Now, if \code{`_missing`=TRUE} is specified in a rule, the missing arguments
will be left missed in the derivative. Look \code{drule[["solve"]]} for an example.

NB. In \code{abs()} and \code{sign()} function, singularity treatment
    at point 0 is left to user's care.
    For example, if you need NA at singular points, you can define the following:
    \code{drule[["abs"]] <- alist(x=ifelse(x==0, NA, sign(x)))}
    \code{drule[["sign"]] <- alist(x=ifelse(x==0, NA, 0))}

NB2. In Bessel functions, derivatives are calculated only by the first argument,
     not by the \code{nu} argument which is supposed to be constant.

NB3. There is a side effect with vector length. E.g. in
     \code{Deriv(~a+b*x, c("a", "b"))} the result is \code{c(a = 1, b = x)}.
     To avoid the difference in lengths of a and b components (when x is a vector),
     one can use an optional parameter \code{combine}
     \code{Deriv(~a+b*x, c("a", "b"), combine="cbind")} which gives
     \code{cbind(a = 1, b = x)} producing a two column matrix which is
     probably the desired result here.
     \cr Another example illustrating a side effect is a plain linear
     regression case and its Hessian:
     \code{Deriv(~sum((a+b*x - y)**2), c("a", "b"), n=c(hessian=2)}
     producing just a constant \code{2} for double differentiation by \code{a}
     instead of expected result \code{2*length(x)}. It comes from a simplification of
     an expression \code{sum(2)} where the constant is not repeated as many times
     as length(x) would require it. Here, using the same trick
     with \code{combine="cbind"} would not help as all 4 derivatives are just scalars.
     Instead, one should modify the previous call to explicitly use a constant vector
     of appropriate length:
     \code{Deriv(~sum((rep(a, length(x))+b*x - y)**2), c("a", "b"), n=2)}

NB4. Differentiation of \code{*apply()} family (available starting from v4.1) is
     done only on the body of the \code{FUN} argument. It implies that this
     body must use the same variable names as in argument \code{x} and they must not
     appear in \code{FUN}s arguments (cf. GMM example).

NB5. Expressions are differentiated as scalar ones. However in some cases, obtained result
     remains valid if the variable of differentiation is a vector. This is just a coincidence.
     If you need to differentiate by vectors, you can try to write your own differentiation rule.
     For example, derivative of \code{sum(x)} where \code{x} is a vector can be done as:
     \code{vsum=function(x) sum(x)}
     \code{drule[["vsum"]] <- alist(x=rep_len(1, length(x)))} # drule is exported from Deriv namespace
     \code{Deriv(~vsum(a*x), "x", drule=drule)}
     \code{# a * rep_len(1, length(a * x))}

NB6. Since v4.2, it is possible to differentiate by named components of lists and vectors
     that are used in \code{with(data, expr)} expressions (cf. "with()" example).
     The names of argument \code{x} ("theta" in an example above) must be used
	directly, e.g \code{with(theta, ...)} or \code{with(as.list(theta), ...)}.
	Otherwise, the \code{expr} of \code{with()} will be differentiated as plain code.
}
\examples{

\dontrun{f <- function(x) x^2}
\dontrun{Deriv(f)}
# function (x)
# 2 * x

\dontrun{f <- function(x, y) sin(x) * cos(y)}
\dontrun{Deriv(f)}
# function (x, y)
# c(x = cos(x) * cos(y), y = -(sin(x) * sin(y)))

\dontrun{f_ <- Deriv(f)}
\dontrun{f_(3, 4)}
#              x         y
# [1,] 0.6471023 0.1068000

\dontrun{Deriv(~ f(x, y^2), "y")}
# -(2 * (y * sin(x) * sin(y^2)))

\dontrun{Deriv(quote(f(x, y^2)), c("x", "y"), cache.exp=FALSE)}
# c(x = cos(x) * cos(y^2), y = -(2 * (y * sin(x) * sin(y^2))))

\dontrun{Deriv(expression(sin(x^2) * y), "x")}
# expression(2*(x*y*cos(x^2)))

Deriv("sin(x^2) * y", "x") # differentiate only by x
"2 * (x * y * cos(x^2))"

Deriv("sin(x^2) * y", cache.exp=FALSE) # differentiate by all variables (here by x and y)
"c(x = 2 * (x * y * cos(x^2)), y = sin(x^2))"

# Compound function example (here abs(x) smoothed near 0)
fc <- function(x, h=0.1) if (abs(x) < h) 0.5*h*(x/h)**2 else abs(x)-0.5*h
Deriv("fc(x)", "x", cache.exp=FALSE)
"if (abs(x) < h) x/h else sign(x)"

# Example of a first argument that cannot be evaluated in the current environment:
\dontrun{
  suppressWarnings(rm("xx", "yy"))
  Deriv(xx^2+yy^2)
}
# c(xx = 2 * xx, yy = 2 * yy)

# Automatic differentiation (AD), note intermediate variable 'd' assignment
\dontrun{Deriv(~{d <- ((x-m)/s)^2; exp(-0.5*d)}, "x", cache.exp=FALSE)}
#{
#   d <- ((x - m)/s)^2
#   .d_x <- 2 * ((x - m)/s^2)
#   -(0.5 * (.d_x * exp(-(0.5 * d))))
#}

# Custom differentiation rule
\dontrun{
  myfun <- function(x, y=TRUE) NULL # do something useful
  dmyfun <- function(x, y=TRUE) NULL # myfun derivative by x.
  drule[["myfun"]] <- alist(x=dmyfun(x, y), y=NULL) # y is just a logical => no derivate
  Deriv(~myfun(z^2, FALSE), "z", drule=drule)
  # 2 * (z * dmyfun(z^2, FALSE))
}

# Differentiation by list components
\dontrun{
  theta <- list(m=0.1, sd=2.)
  x <- names(theta)
  names(x)=rep("theta", length(theta))
  Deriv(~exp(-(x-theta$m)**2/(2*theta$sd)), x, cache.exp=FALSE)
# c(theta_m = exp(-((x - theta$m)^2/(2 * theta$sd))) *
#  (x - theta$m)/theta$sd, theta_sd = 2 * (exp(-((x - theta$m)^2/
#  (2 * theta$sd))) * (x - theta$m)^2/(2 * theta$sd)^2))
}

# Differentiation by list components used in "with()" expression (since v4.2)
# Compare with precedent example.
\dontrun{
  theta <- list(m=0.1, sd=2.)
  x <- names(theta)
  names(x)=rep("theta", length(theta))
  Deriv(~with(theta, exp(-(x-m)**2/(2*sd))), x, cache.exp=FALSE)
# c(theta_m = with(theta, exp(-((x - m)^2/(2 * sd))) * (x - m)/sd), 
#     theta_sd = with(theta, 2 * (exp(-((x - m)^2/(2 * sd))) * 
#         (x - m)^2/(2 * sd)^2)))
}
# Differentiation in matrix calculus
\dontrun{
 Deriv(~solve(matrix(c(1, x, x**2, x**3), nrow=2, ncol=2)))
}

# Two component Gaussian mixture model (GMM) example
\dontrun{
# define GMM probability density function -> p(x, ...)
ncomp=2
a=runif(ncomp)
a=a/sum(a) # amplitude or weight of each component
m=rnorm(ncomp) # mean
s=runif(ncomp) # sd
# two column matrix of probabilities: one row per x value, one column per component
pn=function(x, a, m, s, log=FALSE) {
  n=length(a)
  structure(vapply(seq(n), function(i) a[i]*dnorm(x, m[i], s[i], log),
    double(length(x))), dim=c(length(x), n))
}
p=function(x, a, m, s) rowSums(pn(x, a, m, s)) # overall probability
dp=Deriv(p, "x")
# plot density and its derivative
xp=seq(min(m-2*s), max(m+2*s), length.out=200)
matplot(xp, cbind(p(xp, a, m, s), dp(xp, a, m, s)),
   xlab="x", ylab="p, dp/dx", type="l", main="Two component GMM")
}
}
\author{
Andrew Clausen (original version) and Serguei Sokol (actual version and maintainer)
}
\concept{symbolic differentiation}
