

grammar_probs <- function(sg, prior, dic, alpha0 = 1) {

  if (alpha0 < 0) stop("alpha0 must be >= 0.")

  sentence_prob <- attr(prior, "sentence_prob")
  if (is.null(sentence_prob)) {
    stop("prior must have a 'sentence_prob' attribute (from prior_probs()).")
  }

  # --- Grammar types from the prior ---
  all_types <- names(prior)
  K <- length(all_types)

  # --- Identify verb-like types for correction ---
  is_verb <- vapply(all_types, function(type_str) {
    if (type_str == "V") return(TRUE)
    is_op <- grepl("\u2192", type_str) || grepl("->", type_str, fixed = TRUE)
    if (!is_op) return(FALSE)
    ret <- sub(".*(\u2192|->)\\s*", "", type_str)
    trimws(ret) == "V"
  }, logical(1))

  x <- ifelse(is_verb, 1 / sentence_prob, 1)

  # --- Dictionary: trans. entries for single signs ---
  trans <- dic[dic$row_type == "trans.", ]

  # --- Unique positions from sign_grammar output ---
  positions <- sort(unique(sg$position))
  # sign_name and cuneiform per position
  pos_info <- sg[!duplicated(sg$position), c("position", "sign_name", "cuneiform")]

  # --- Build result: one row per position per grammar type ---
  rows <- vector("list", length(positions) * K)
  idx <- 0L

  for (pos in positions) {
    info <- pos_info[pos_info$position == pos, ]
    s <- info$sign_name

    # Raw counts n[k] from dictionary
    entries <- trans[trans$sign_name == s, ]
    entries <- entries[!is.na(entries$type) & trimws(entries$type) != "", ]

    n_k <- setNames(numeric(K), all_types)
    if (nrow(entries) > 0L) {
      for (j in seq_len(nrow(entries))) {
        t <- trimws(entries$type[j])
        if (t %in% all_types) {
          cnt <- if (is.na(entries$count[j])) 1L else entries$count[j]
          n_k[t] <- n_k[t] + cnt
        }
      }
    }

    # Corrected counts m[k] = n[k] * x[k]
    m_k <- n_k * x
    M <- sum(m_k)

    # Posterior: theta[k] = (alpha0 * p[k] + m[k]) / (alpha0 + M)
    theta <- (alpha0 * prior + m_k) / (alpha0 + M)

    for (ki in seq_len(K)) {
      idx <- idx + 1L
      rows[[idx]] <- data.frame(
        position   = pos,
        sign_name  = s,
        cuneiform  = info$cuneiform,
        type       = all_types[ki],
        prob       = theta[ki],
        n          = as.integer(n_k[ki]),
        stringsAsFactors = FALSE,
        row.names  = NULL
      )
    }
  }

  out <- do.call(rbind, rows[seq_len(idx)])
  rownames(out) <- NULL
  out
}
