% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MetaNLP.R
\docType{package}
\name{MetaNLP}
\alias{MetaNLP}
\alias{MetaNLP-class}
\title{Natural Language Processing for Meta Analysis}
\usage{
MetaNLP(
  file,
  bounds = c(2, Inf),
  word_length = c(3, Inf),
  language = "english",
  weighting = "frequency",
  ...
)
}
\arguments{
\item{file}{Either the path to the CSV file or a data frame containing the
abstracts}

\item{bounds}{An integer vector of length 2. The first value specifies
the minimum number of appearances of a word to become a column of the word
count matrix, the second value specifies the maximum number.
Defaults to \code{c(2, Inf)}. Note that the bounds are with respect to
the potentially weighted entries of the matrix.}

\item{word_length}{An integer vector of length 2. The first value specifies
the minimum number of characters of a word to become a column of the word
count matrix, the second value specifies the maximum number.
Defaults to \code{c(3, Inf)}.}

\item{language}{The language for lemmatization and stemming. Supported
languages are \code{english}, \code{french}, \code{german}, \code{russian} and
\code{spanish}. For non-english languages make sure that the csv
which is processed has the correct encoding.}

\item{weighting}{A weighting function for the entries of the document-term matrix.
Default is "frequency", other options are "binary" and "tf-idf".}

\item{...}{Additional arguments passed on to \code{read.csv2}, e.g. when
"," should be used as a separator or when the encoding should be changed.
See \link[utils]{read.table}.}
}
\value{
An object of class \code{MetaNLP}
}
\description{
The \pkg{MetaNLP} package provides methods to quickly transform a
CSV-file with titles and abstracts to an R data frame that can be
used for automatic title-abstract screening using machine learning.

A \code{MetaNLP} object is the base class of the package \pkg{MetaNLP}.
It is initialized by passing the path to a CSV file and constructs
a data frame whose column names are the words that occur in the titles
and abstracts and whose cells contain the word frequencies for each
paper.
}
\details{
An object of class \code{MetaNLP} contains a slot data_frame where
the document-term matrix is stored as a data frame.
The CSV file must have a column \code{ID} to identify each paper, a column
\code{title} with the belonging titles of the papers and a column
\code{abstract} which contains the abstracts. If the CSV stores training data,
a column \code{decision} should exist, indicating whether an abstract
is included in the meta analysis. This column does not need to exist, because
there is no decision for test data yet. Allowed values in this column are
either "yes" and "no" or "include" and "exclude" or "maybe". The value "maybe"
is handled as a "yes"/"include".
}
\note{
To ensure correct processing of the data when there are special characters
(e.g. "é" or "ü"), make sure that the csv-file is correctly encoded
as \code{UTF-8}.
The stemming algorithm makes use of the C libstemmer library generated by
Snowball. When german texts are stemmed, umlauts are replaced by their
non-umlaut equivalent, so "ä" becomes "a" etc.
}
\examples{
path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
obj <- MetaNLP(path)

}
\seealso{
Useful links:
\itemize{
  \item \url{https://github.com/imbi-heidelberg/MetaNLP}
  \item Report bugs at \url{https://github.com/imbi-heidelberg/MetaNLP/issues}
}

}
\author{
\strong{Maintainer}: Maximilian Pilz \email{maximilian.pilz@itwm.fraunhofer.de} (\href{https://orcid.org/0000-0002-9685-1613}{ORCID})

Authors:
\itemize{
  \item Nico Bruder \email{brudernico@gmail.com} (\href{https://orcid.org/0009-0004-9522-2075}{ORCID})
  \item Samuel Zimmermann \email{zimmermann@imbi.uni-heidelberg.de} (\href{https://orcid.org/0009-0000-4828-9294}{ORCID})
  \item Johannes Vey \email{vey@imbi.uni-heidelberg.de} (\href{https://orcid.org/0000-0002-2610-9667}{ORCID})
}

Other contributors:
\itemize{
  \item Institute of Medical Biometry - University of Heidelberg [copyright holder]
}

}
