## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(dtaudit)
library(data.table)

## ----data---------------------------------------------------------------------
orders <- data.table(
  order_id   = 1:8,
  customer   = c("Alice", "Bob", "Alice", "Carol", "Bob", "Alice", "Dave", "Eve"),
  product_id = c(101L, 102L, 101L, 103L, 104L, 102L, 105L, 106L),
  amount     = c(50, 30, 50, 75, 20, 35, 60, 45)
)

products <- data.table(
  product_id = c(101L, 102L, 103L, 104L, 107L),
  category   = c("Electronics", "Books", "Clothing", "Books", "Food"),
  price      = c(25.0, 15.0, 37.5, 10.0, 8.0)
)

## ----validate-pk--------------------------------------------------------------
validate_primary_keys(orders, "order_id")
validate_primary_keys(products, "product_id")

## ----validate-join------------------------------------------------------------
validate_join(orders, products, by = "product_id")

## ----validate-join-stat-------------------------------------------------------
validate_join(orders, products, by = "product_id",
              stat.x = "amount", stat.y = "price")

## ----filter-------------------------------------------------------------------
merged <- merge(orders, products, by = "product_id", all.x = TRUE)

# Keep only Electronics and Books, report dropped amount
result <- filter_keep(merged, category %in% c("Electronics", "Books"), stat = amount)

## ----compare------------------------------------------------------------------
compare_datatables(orders, result)

## ----diagnose-nas-------------------------------------------------------------
# Introduce some NAs to demonstrate
merged_with_na <- copy(merged)
merged_with_na[sample(.N, 2), category := NA]

diagnose_nas(merged_with_na)

## ----diagnose-strings---------------------------------------------------------
diagnose_strings(orders$customer)

## ----date-coverage------------------------------------------------------------
dates <- as.IDate(c("2024-01-15", "2024-02-20", "2024-04-10", "2024-05-05"))
check_date_coverage(dates, "2024-01-01", "2024-06-30")

## ----summary-table------------------------------------------------------------
get_summary_table(orders)

## ----audit-clean--------------------------------------------------------------
firms <- c("Apple Inc.", "MICROSOFT CORP", "Alphabet LLC", "apple", NA)
audit_clean(firms, clean_firm_name)

