#' @title Bayesian Generalized Linear Models for Sector Analysis
#' @name bayesian_glm
#' @description Functions for fitting Bayesian GLMs sector by sector.
NULL

#' Fit Bayesian GLM for Each Sector
#'
#' Fits separate Bayesian generalized linear models for each sector,
#' regressing production prices on direct prices.
#'
#' @param direct_prices Data frame with direct prices. First column must be
#'   'Year', remaining columns are sector values.
#' @param production_prices Data frame with prices of production. Must have
#'   same structure as direct_prices.
#' @param chains Number of MCMC chains. Default 4.
#' @param iter Number of iterations per chain. Default 4000.
#' @param seed Random seed for reproducibility. Default 12345.
#' @param verbose Logical. Print progress messages. Default TRUE.
#'
#' @return A list with two elements:
#' \describe{
#'   \item{results}{List of results for each sector}
#'   \item{summary_table}{Data frame with summary statistics for all sectors}
#' }
#'
#' @details
#' This function requires the 'rstanarm' and 'loo' packages to be installed.
#' Each sector model uses a Gaussian family with identity link and
#' weakly informative priors.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("rstanarm", quietly = TRUE)) {
#'   set.seed(123)
#'   years <- 2000:2010
#'
#'   direct <- data.frame(
#'     Year = years,
#'     Agriculture = 100 + cumsum(rnorm(11, 2, 1)),
#'     Manufacturing = 120 + cumsum(rnorm(11, 2, 1))
#'   )
#'
#'   production <- data.frame(
#'     Year = years,
#'     Agriculture = 102 + cumsum(rnorm(11, 2, 1)),
#'     Manufacturing = 118 + cumsum(rnorm(11, 2, 1))
#'   )
#'
#'   results <- fit_bayesian_glm_sectors(
#'     direct, production,
#'     chains = 2, iter = 1000
#'   )
#'   print(results$summary_table)
#' }
#' }
#'
#' @export
fit_bayesian_glm_sectors <- function(direct_prices,
                                      production_prices,
                                      chains = 4L,
                                      iter = 4000L,
                                      seed = 12345L,
                                      verbose = TRUE) {

    check_package("rstanarm", "Bayesian GLM fitting")
    check_package("loo", "model comparison via LOO-CV")

    if (!("Year" %in% names(direct_prices))) {
        stop("direct_prices must have a 'Year' column.")
    }

    sector_names <- names(direct_prices)[-1L]

    if (!all(sector_names %in% names(production_prices))) {
        missing <- setdiff(sector_names, names(production_prices))
        stop(sprintf(
            "Sectors missing in production_prices: %s",
            paste(missing, collapse = ", ")
        ))
    }

    results_list <- vector("list", length(sector_names))
    names(results_list) <- sector_names

    n_sectors <- length(sector_names)

    if (verbose) {
        message(sprintf("Fitting Bayesian GLM for %d sectors...", n_sectors))
    }

    for (i in seq_along(sector_names)) {

        sector <- sector_names[i]

        if (verbose) {
            message(sprintf("  [%d/%d] %s", i, n_sectors, sector))
        }

        result <- tryCatch(
            fit_single_sector_glm(
                x_values = direct_prices[[sector]],
                y_values = production_prices[[sector]],
                sector_name = sector,
                chains = chains,
                iter = iter,
                seed = seed
            ),
            error = function(e) {
                list(
                    sector = sector,
                    error = conditionMessage(e)
                )
            }
        )

        results_list[[i]] <- result
    }

    has_error <- sapply(results_list, function(x) "error" %in% names(x))
    n_success <- sum(!has_error)
    n_error <- sum(has_error)

    if (verbose) {
        message(sprintf(
            "Completed: %d successful, %d with errors.",
            n_success, n_error
        ))
    }

    summary_table <- NULL
    if (n_success > 0L) {
        successful <- results_list[!has_error]
        summary_table <- create_sector_summary_table(successful)
    }

    list(
        results = results_list,
        summary_table = summary_table,
        n_successful = n_success,
        n_errors = n_error
    )
}


#' Fit Single Sector Bayesian GLM (Internal)
#'
#' Internal function to fit a Bayesian GLM for one sector.
#'
#' @param x_values Numeric vector of direct price values.
#' @param y_values Numeric vector of production price values.
#' @param sector_name Character string with sector name.
#' @param chains Number of MCMC chains.
#' @param iter Number of iterations.
#' @param seed Random seed.
#'
#' @return A list with model results and diagnostics.
#'
#' @keywords internal
fit_single_sector_glm <- function(x_values, y_values, sector_name,
                                   chains, iter, seed) {

    x_values <- suppressWarnings(as.numeric(x_values))
    y_values <- suppressWarnings(as.numeric(y_values))

    valid_idx <- stats::complete.cases(x_values, y_values)

    if (sum(valid_idx) < 3L) {
        stop("Insufficient valid observations (need at least 3).")
    }

    x_clean <- x_values[valid_idx]
    y_clean <- y_values[valid_idx]

    model_df <- data.frame(x = x_clean, y = y_clean)

    y_mean <- mean(y_clean)
    y_sd <- stats::sd(y_clean)
    if (!is.finite(y_sd) || y_sd == 0) {
        y_sd <- 1e-6
    }

    bglm_model <- rstanarm::stan_glm(
        y ~ x,
        data = model_df,
        family = stats::gaussian(link = "identity"),
        prior_intercept = rstanarm::normal(y_mean, y_sd),
        algorithm = "sampling",
        chains = chains,
        iter = iter,
        seed = seed,
        refresh = 0
    )

    coefficients <- stats::coef(bglm_model)
    intercept <- unname(coefficients[1L])
    slope <- unname(coefficients[2L])

    loo_result <- loo::loo(bglm_model)

    r2_values <- get_r2_values(bglm_model)

    predictions <- stats::fitted(bglm_model)

    mae_value <- Metrics::mae(y_clean, predictions)
    rmse_value <- Metrics::rmse(y_clean, predictions)

    y_stats <- list(
        mean = mean(y_clean),
        median = stats::median(y_clean),
        min = min(y_clean),
        max = max(y_clean),
        range = diff(range(y_clean))
    )

    mae_rel <- list(
        range = safe_pct(mae_value, y_stats$range),
        mean = safe_pct(mae_value, y_stats$mean),
        median = safe_pct(mae_value, y_stats$median)
    )

    rmse_rel <- list(
        range = safe_pct(rmse_value, y_stats$range),
        mean = safe_pct(rmse_value, y_stats$mean),
        median = safe_pct(rmse_value, y_stats$median),
        max = safe_pct(rmse_value, y_stats$max),
        min = safe_pct(rmse_value, y_stats$min)
    )

    list(
        sector = sector_name,
        intercept = intercept,
        slope = slope,
        R2_mean = unname(r2_values["mean"]),
        R2_median = unname(r2_values["median"]),
        R2_mode = unname(r2_values["mode"]),
        MAE = mae_value,
        RMSE = rmse_value,
        MAE_rel_range_pct = mae_rel$range,
        MAE_rel_mean_pct = mae_rel$mean,
        MAE_rel_median_pct = mae_rel$median,
        RMSE_rel_range_pct = rmse_rel$range,
        RMSE_rel_mean_pct = rmse_rel$mean,
        RMSE_rel_median_pct = rmse_rel$median,
        RMSE_rel_max_pct = rmse_rel$max,
        RMSE_rel_min_pct = rmse_rel$min,
        ELPD_LOO = loo_result$estimates["elpd_loo", "Estimate"],
        p_LOO = loo_result$estimates["p_loo", "Estimate"],
        LOOIC = loo_result$estimates["looic", "Estimate"],
        Y_mean = y_stats$mean,
        Y_median = y_stats$median,
        Y_range = y_stats$range,
        Y_min = y_stats$min,
        Y_max = y_stats$max,
        n_obs = length(y_clean),
        model = bglm_model,
        predictions = predictions,
        x_data = x_clean,
        y_data = y_clean
    )
}


#' Create Summary Table from Sector Results
#'
#' Internal function to create a data frame summary from sector results.
#'
#' @param results_list List of successful sector results.
#'
#' @return Data frame with summary statistics.
#'
#' @keywords internal
create_sector_summary_table <- function(results_list) {

    rows <- lapply(results_list, function(res) {
        data.frame(
            Sector = res$sector,
            Intercept = round(res$intercept, 4),
            Slope = round(res$slope, 4),
            R2_mean = round(res$R2_mean, 4),
            R2_median = round(res$R2_median, 4),
            R2_mode = round(res$R2_mode, 4),
            MAE = round(res$MAE, 4),
            MAE_rel_range = round(res$MAE_rel_range_pct, 2),
            MAE_rel_mean = round(res$MAE_rel_mean_pct, 2),
            RMSE = round(res$RMSE, 4),
            RMSE_rel_range = round(res$RMSE_rel_range_pct, 2),
            RMSE_rel_mean = round(res$RMSE_rel_mean_pct, 2),
            ELPD_LOO = round(res$ELPD_LOO, 2),
            p_LOO = round(res$p_LOO, 2),
            LOOIC = round(res$LOOIC, 2),
            Y_mean = round(res$Y_mean, 4),
            Y_range = round(res$Y_range, 4),
            N_obs = res$n_obs,
            stringsAsFactors = FALSE
        )
    })

    do.call(rbind, rows)
}


#' Extract Sector Coefficients
#'
#' Extracts intercepts and slopes from all sector models.
#'
#' @param sector_results List of sector results from fit_bayesian_glm_sectors.
#'
#' @return Data frame with sector names, intercepts, and slopes.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("rstanarm", quietly = TRUE)) {
#'   set.seed(123)
#'   years <- 2000:2010
#'   direct <- data.frame(
#'     Year = years,
#'     A = 100 + cumsum(rnorm(11)),
#'     B = 120 + cumsum(rnorm(11))
#'   )
#'   production <- data.frame(
#'     Year = years,
#'     A = 102 + cumsum(rnorm(11)),
#'     B = 118 + cumsum(rnorm(11))
#'   )
#'   results <- fit_bayesian_glm_sectors(direct, production,
#'                                        chains = 2, iter = 1000)
#'   coefs <- extract_sector_coefficients(results$results)
#'   print(coefs)
#' }
#' }
#'
#' @export
extract_sector_coefficients <- function(sector_results) {

    has_error <- sapply(sector_results, function(x) "error" %in% names(x))
    valid <- sector_results[!has_error]

    if (length(valid) == 0L) {
        warning("No valid sector results to extract.")
        return(data.frame(
            sector = character(0),
            intercept = numeric(0),
            slope = numeric(0)
        ))
    }

    data.frame(
        sector = sapply(valid, function(x) x$sector),
        intercept = sapply(valid, function(x) x$intercept),
        slope = sapply(valid, function(x) x$slope),
        stringsAsFactors = FALSE
    )
}
