-- lib/tongue/transliteration.lua
--
-- Lua I18N library 'Tongue' - Transliteration of strings
--
-- Copyright 2016 Daniel Silverstone <dsilvers@digital-scurf.org>
--
-- For licence terms, see COPYING
--

--- Tongue language packs are internally always in UTF-8, but users may need
-- different encodings.
--
-- Since users might have all sorts of ways of specifying the desired character
-- encoding for their messages, Tongue provides a mechanism for deriving the
-- target character encoding and then transliterating to and from that
-- encoding.
--
-- @module tongue.transliteration

local iconv = require "iconv"
local util = require "tongue.util"

local converter = {}

--- Tongue character-set converter
--
-- Tongue deals internally in UTF-8 but may have to handle input and output
-- in any character set a user may choose.  The converter object wrappers
-- a pair of iconv descriptors which manage that conversion.
--
--
-- @type converter

--- Convert a string to the user character set.
--
-- @tparam string input The input (UTF-8) string
-- @treturn string The output (user charset) string
-- @function touser
function converter:touser(input)
   local s, v = self._touser:iconv(input)
   return s or ("Error " .. tostring(v))
end

--- Convert a string from the user character set.
--
-- @tparam string input The input (user charset) string
-- @treturn string The output (UTF-8) string
-- @function fromuser
function converter:fromuser(input)
   local s, v = self._fromuser:iconv(input)
   return s or ("Error " .. tostring(v))
end

local converter_mt = {__index=converter}

---
-- @section tongue.transliteration

--- Retrieve a tongue encoding converter.
--
-- Construct and return an encoder which can convert between the provided
-- encoding and UTF-8 in either direction.  The converter will be configured to
-- transliterate where possible and to replace bad or unknown codepoints so as
-- to ensure that the outputs are always valid.
--
-- If the desired encoding is UTF-8 then the encoder returned shall effectively
-- be a passthrough, excepting that invalid or malformed codepoints shall be
-- "cleaned up" by the encoder object.
--
-- @tparam string encoding The desired encoding to be used
-- @treturn encoder The bidirectional character encoder
-- @function get
local function get_converter(encoding)
   local conv = {
      _touser = iconv.open(encoding .. "//TRANSLIT//IGNORE", "UTF-8"),
      _fromuser = iconv.open("UTF-8//TRANSLIT//IGNORE", encoding)
   }
   return setmetatable(conv, converter_mt)
end

--- Retrieve a tongue encoding converter based on the environment.
--
-- Firstly this function attempts to determine the encoding desired by the
-- "client" by means of examining the provided environment table (or the
-- process environment table if none was given).  Once an encoding has been
-- determined somehow, tongue will return an encoder by calling through to
-- the @{get} function.
--
-- If no encoding can be determined from the provided table, tongue will assume
-- that UTF-8 is appropriate.
--
-- @tparam ?table env The environment to use (or nil to use the process env)
-- @treturn encoder The bidirectional character encoder
-- @function guess
local function guess_converter(env)
   local function getenv(k)
      if env then
	 return env[k] or ""
      else
	 return os.getenv(k) or ""
      end
   end
   -- glibc's approach is first to look at LC_ALL, then failing that
   -- LC_MESSAGES, and failing that, LANG. (Well, LANG is considered first as
   -- a fallback approach, but considering we're just hunting for an encoding
   -- it'll be okay to consider it last)
   local category = getenv "LC_ALL"
   if category and category ~= "" then
      local _, __, enc = util.split_category(category)
      if enc == "" then enc = nil end
      return get_converter(enc or "UTF-8"), category
   end
   category = getenv "LC_MESSAGES"
   if category and category ~= "" then
      local _, __, enc = util.split_category(category)
      if enc == "" then enc = nil end
      return get_converter(enc or "UTF-8"), category
   end
   category = getenv "LANG"
   if category and category ~= "" then
      local _, __, enc = util.split_category(category)
      if enc == "" then enc = nil end
      return get_converter(enc or "UTF-8"), category
   end
   return get_converter("UTF-8")
end

return {
   get = get_converter,
   guess = guess_converter,
}
   
