#include "icu-collate.hpp"

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2011-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  appendable.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010dec07
*   created by: Markus W. Scherer
*/


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2011-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  appendable.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010dec07
*   created by: Markus W. Scherer
*/


/**
 * \file
 * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts).
 */



#if U_SHOW_CPLUSPLUS_API



U_NAMESPACE_BEGIN

class UnicodeString;

/**
 * Base class for objects to which Unicode characters and strings can be appended.
 * Combines elements of Java Appendable and ICU4C ByteSink.
 *
 * This class can be used in APIs where it does not matter whether the actual destination is
 * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object
 * that receives and processes characters and/or strings.
 *
 * Implementation classes must implement at least appendCodeUnit(char16_t).
 * The base class provides default implementations for the other methods.
 *
 * The methods do not take UErrorCode parameters.
 * If an error occurs (e.g., out-of-memory),
 * in addition to returning FALSE from failing operations,
 * the implementation must prevent unexpected behavior (e.g., crashes)
 * from further calls and should make the error condition available separately
 * (e.g., store a UErrorCode, make/keep a UnicodeString bogus).
 * @stable ICU 4.8
 */
class U_COMMON_API Appendable : public UObject {
public:
    /**
     * Destructor.
     * @stable ICU 4.8
     */
    ~Appendable();

    /**
     * Appends a 16-bit code unit.
     * @param c code unit
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool appendCodeUnit(char16_t c) = 0;

    /**
     * Appends a code point.
     * The default implementation calls appendCodeUnit(char16_t) once or twice.
     * @param c code point 0..0x10ffff
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool appendCodePoint(UChar32 c);

    /**
     * Appends a string.
     * The default implementation calls appendCodeUnit(char16_t) for each code unit.
     * @param s string, must not be NULL if length!=0
     * @param length string length, or -1 if NUL-terminated
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool appendString(const char16_t *s, int32_t length);

    /**
     * Tells the object that the caller is going to append roughly
     * appendCapacity char16_ts. A subclass might use this to pre-allocate
     * a larger buffer if necessary.
     * The default implementation does nothing. (It always returns TRUE.)
     * @param appendCapacity estimated number of char16_ts that will be appended
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool reserveAppendCapacity(int32_t appendCapacity);

    /**
     * Returns a writable buffer for appending and writes the buffer's capacity to
     * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
     * May return a pointer to the caller-owned scratch buffer which must have
     * scratchCapacity>=minCapacity.
     * The returned buffer is only valid until the next operation
     * on this Appendable.
     *
     * After writing at most *resultCapacity char16_ts, call appendString() with the
     * pointer returned from this function and the number of char16_ts written.
     * Many appendString() implementations will avoid copying char16_ts if this function
     * returned an internal buffer.
     *
     * Partial usage example:
     * \code
     *  int32_t capacity;
     *  char16_t* buffer = app.getAppendBuffer(..., &capacity);
     *  ... Write n char16_ts into buffer, with n <= capacity.
     *  app.appendString(buffer, n);
     * \endcode
     * In many implementations, that call to append will avoid copying char16_ts.
     *
     * If the Appendable allocates or reallocates an internal buffer, it should use
     * the desiredCapacityHint if appropriate.
     * If a caller cannot provide a reasonable guess at the desired capacity,
     * it should pass desiredCapacityHint=0.
     *
     * If a non-scratch buffer is returned, the caller may only pass
     * a prefix to it to appendString().
     * That is, it is not correct to pass an interior pointer to appendString().
     *
     * The default implementation always returns the scratch buffer.
     *
     * @param minCapacity required minimum capacity of the returned buffer;
     *                    must be non-negative
     * @param desiredCapacityHint desired capacity of the returned buffer;
     *                            must be non-negative
     * @param scratch default caller-owned buffer
     * @param scratchCapacity capacity of the scratch buffer
     * @param resultCapacity pointer to an integer which will be set to the
     *                       capacity of the returned buffer
     * @return a buffer with *resultCapacity>=minCapacity
     * @stable ICU 4.8
     */
    virtual char16_t *getAppendBuffer(int32_t minCapacity,
                                   int32_t desiredCapacityHint,
                                   char16_t *scratch, int32_t scratchCapacity,
                                   int32_t *resultCapacity);
};

/**
 * An Appendable implementation which writes to a UnicodeString.
 *
 * This class is not intended for public subclassing.
 * @stable ICU 4.8
 */
class U_COMMON_API UnicodeStringAppendable : public Appendable {
public:
    /**
     * Aliases the UnicodeString (keeps its reference) for writing.
     * @param s The UnicodeString to which this Appendable will write.
     * @stable ICU 4.8
     */
    explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {}

    /**
     * Destructor.
     * @stable ICU 4.8
     */
    ~UnicodeStringAppendable();

    /**
     * Appends a 16-bit code unit to the string.
     * @param c code unit
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool appendCodeUnit(char16_t c);

    /**
     * Appends a code point to the string.
     * @param c code point 0..0x10ffff
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool appendCodePoint(UChar32 c);

    /**
     * Appends a string to the UnicodeString.
     * @param s string, must not be NULL if length!=0
     * @param length string length, or -1 if NUL-terminated
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool appendString(const char16_t *s, int32_t length);

    /**
     * Tells the UnicodeString that the caller is going to append roughly
     * appendCapacity char16_ts.
     * @param appendCapacity estimated number of char16_ts that will be appended
     * @return TRUE if the operation succeeded
     * @stable ICU 4.8
     */
    virtual UBool reserveAppendCapacity(int32_t appendCapacity);

    /**
     * Returns a writable buffer for appending and writes the buffer's capacity to
     * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
     * May return a pointer to the caller-owned scratch buffer which must have
     * scratchCapacity>=minCapacity.
     * The returned buffer is only valid until the next write operation
     * on the UnicodeString.
     *
     * For details see Appendable::getAppendBuffer().
     *
     * @param minCapacity required minimum capacity of the returned buffer;
     *                    must be non-negative
     * @param desiredCapacityHint desired capacity of the returned buffer;
     *                            must be non-negative
     * @param scratch default caller-owned buffer
     * @param scratchCapacity capacity of the scratch buffer
     * @param resultCapacity pointer to an integer which will be set to the
     *                       capacity of the returned buffer
     * @return a buffer with *resultCapacity>=minCapacity
     * @stable ICU 4.8
     */
    virtual char16_t *getAppendBuffer(int32_t minCapacity,
                                   int32_t desiredCapacityHint,
                                   char16_t *scratch, int32_t scratchCapacity,
                                   int32_t *resultCapacity);

private:
    UnicodeString &str;
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */




U_NAMESPACE_BEGIN

Appendable::~Appendable() {}

UBool
Appendable::appendCodePoint(UChar32 c) {
    if(c<=0xffff) {
        return appendCodeUnit((UChar)c);
    } else {
        return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
    }
}

UBool
Appendable::appendString(const UChar *s, int32_t length) {
    if(length<0) {
        UChar c;
        while((c=*s++)!=0) {
            if(!appendCodeUnit(c)) {
                return FALSE;
            }
        }
    } else if(length>0) {
        const UChar *limit=s+length;
        do {
            if(!appendCodeUnit(*s++)) {
                return FALSE;
            }
        } while(s<limit);
    }
    return TRUE;
}

UBool
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
    return TRUE;
}

UChar *
Appendable::getAppendBuffer(int32_t minCapacity,
                            int32_t /*desiredCapacityHint*/,
                            UChar *scratch, int32_t scratchCapacity,
                            int32_t *resultCapacity) {
    if(minCapacity<1 || scratchCapacity<minCapacity) {
        *resultCapacity=0;
        return NULL;
    }
    *resultCapacity=scratchCapacity;
    return scratch;
}

// UnicodeStringAppendable is implemented in unistr.cpp.

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2007-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  bmpset.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2007jan29
*   created by: Markus W. Scherer
*/





// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File CMEMORY.H
*
*  Contains stdlib.h/string.h memory functions
*
* @author       Bertrand A. Damiba
*
* Modification History:
*
*   Date        Name        Description
*   6/20/98     Bertrand    Created.
*  05/03/99     stephen     Changed from functions to macros.
*
******************************************************************************
*/




#include <stddef.h>
#include <string.h>


#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
#endif


#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)

/**
 * \def UPRV_LENGTHOF
 * Convenience macro to determine the length of a fixed array at compile-time.
 * @param array A fixed length array
 * @return The length of the array, in elements
 * @internal
 */
#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
#define uprv_memchr(ptr, value, num) U_STANDARD_CPP_NAMESPACE memchr(ptr, value, num)

U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);

U_CAPI void * U_EXPORT2
uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);

U_CAPI void U_EXPORT2
uprv_free(void *mem);

U_CAPI void * U_EXPORT2
uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);

/**
 * Get the least significant bits of a pointer (a memory address).
 * For example, with a mask of 3, the macro gets the 2 least significant bits,
 * which will be 0 if the pointer is 32-bit (4-byte) aligned.
 *
 * uintptr_t is the most appropriate integer type to cast to.
 */
#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask))

/**
 * Create & return an instance of "type" in statically allocated storage.
 * e.g.
 *    static std::mutex *myMutex = STATIC_NEW(std::mutex);
 * To destroy an object created in this way, invoke the destructor explicitly, e.g.
 *    myMutex->~mutex();
 * DO NOT use delete.
 * DO NOT use with class UMutex, which has specific support for static instances.
 *
 * STATIC_NEW is intended for use when
 *   - We want a static (or global) object.
 *   - We don't want it to ever be destructed, or to explicitly control destruction,
 *     to avoid use-after-destruction problems.
 *   - We want to avoid an ordinary heap allocated object,
 *     to avoid the possibility of memory allocation failures, and
 *     to avoid memory leak reports, from valgrind, for example.
 * This is defined as a macro rather than a template function because each invocation
 * must define distinct static storage for the object being returned.
 */
#define STATIC_NEW(type) [] () { \
    alignas(type) static char storage[sizeof(type)]; \
    return new(storage) type();} ()

/**
  *  Heap clean up function, called from u_cleanup()
  *    Clears any user heap functions from u_setMemoryFunctions()
  *    Does NOT deallocate any remaining allocated memory.
  */
U_CFUNC UBool 
cmemory_cleanup(void);

/**
 * A function called by <TT>uhash_remove</TT>,
 * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
 * an existing key or value.
 * @param obj A key or value stored in a hashtable
 * @see uprv_deleteUObject
 */
typedef void U_CALLCONV UObjectDeleter(void* obj);

/**
 * Deleter for UObject instances.
 * Works for all subclasses of UObject because it has a virtual destructor.
 */
U_CAPI void U_EXPORT2
uprv_deleteUObject(void *obj);

#ifdef __cplusplus

#include <utility>


U_NAMESPACE_BEGIN

/**
 * "Smart pointer" class, deletes memory via uprv_free().
 * For most methods see the LocalPointerBase base class.
 * Adds operator[] for array item access.
 *
 * @see LocalPointerBase
 */
template<typename T>
class LocalMemory : public LocalPointerBase<T> {
public:
    using LocalPointerBase<T>::operator*;
    using LocalPointerBase<T>::operator->;
    /**
     * Constructor takes ownership.
     * @param p simple pointer to an array of T items that is adopted
     */
    explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
    /**
     * Move constructor, leaves src with isNull().
     * @param src source smart pointer
     */
    LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
        src.ptr=NULL;
    }
    /**
     * Destructor deletes the memory it owns.
     */
    ~LocalMemory() {
        uprv_free(LocalPointerBase<T>::ptr);
    }
    /**
     * Move assignment operator, leaves src with isNull().
     * The behavior is undefined if *this and src are the same object.
     * @param src source smart pointer
     * @return *this
     */
    LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
        uprv_free(LocalPointerBase<T>::ptr);
        LocalPointerBase<T>::ptr=src.ptr;
        src.ptr=NULL;
        return *this;
    }
    /**
     * Swap pointers.
     * @param other other smart pointer
     */
    void swap(LocalMemory<T> &other) U_NOEXCEPT {
        T *temp=LocalPointerBase<T>::ptr;
        LocalPointerBase<T>::ptr=other.ptr;
        other.ptr=temp;
    }
    /**
     * Non-member LocalMemory swap function.
     * @param p1 will get p2's pointer
     * @param p2 will get p1's pointer
     */
    friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
        p1.swap(p2);
    }
    /**
     * Deletes the array it owns,
     * and adopts (takes ownership of) the one passed in.
     * @param p simple pointer to an array of T items that is adopted
     */
    void adoptInstead(T *p) {
        uprv_free(LocalPointerBase<T>::ptr);
        LocalPointerBase<T>::ptr=p;
    }
    /**
     * Deletes the array it owns, allocates a new one and reset its bytes to 0.
     * Returns the new array pointer.
     * If the allocation fails, then the current array is unchanged and
     * this method returns NULL.
     * @param newCapacity must be >0
     * @return the allocated array pointer, or NULL if the allocation failed
     */
    inline T *allocateInsteadAndReset(int32_t newCapacity=1);
    /**
     * Deletes the array it owns and allocates a new one, copying length T items.
     * Returns the new array pointer.
     * If the allocation fails, then the current array is unchanged and
     * this method returns NULL.
     * @param newCapacity must be >0
     * @param length number of T items to be copied from the old array to the new one;
     *               must be no more than the capacity of the old array,
     *               which the caller must track because the LocalMemory does not track it
     * @return the allocated array pointer, or NULL if the allocation failed
     */
    inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
    /**
     * Array item access (writable).
     * No index bounds check.
     * @param i array index
     * @return reference to the array item
     */
    T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
};

template<typename T>
inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
    if(newCapacity>0) {
        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
        if(p!=NULL) {
            uprv_memset(p, 0, newCapacity*sizeof(T));
            uprv_free(LocalPointerBase<T>::ptr);
            LocalPointerBase<T>::ptr=p;
        }
        return p;
    } else {
        return NULL;
    }
}


template<typename T>
inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
    if(newCapacity>0) {
        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
        if(p!=NULL) {
            if(length>0) {
                if(length>newCapacity) {
                    length=newCapacity;
                }
                uprv_memcpy(p, LocalPointerBase<T>::ptr, (size_t)length*sizeof(T));
            }
            uprv_free(LocalPointerBase<T>::ptr);
            LocalPointerBase<T>::ptr=p;
        }
        return p;
    } else {
        return NULL;
    }
}

/**
 * Simple array/buffer management class using uprv_malloc() and uprv_free().
 * Provides an internal array with fixed capacity. Can alias another array
 * or allocate one.
 *
 * The array address is properly aligned for type T. It might not be properly
 * aligned for types larger than T (or larger than the largest subtype of T).
 *
 * Unlike LocalMemory and LocalArray, this class never adopts
 * (takes ownership of) another array.
 *
 * WARNING: MaybeStackArray only works with primitive (plain-old data) types.
 * It does NOT know how to call a destructor! If you work with classes with
 * destructors, consider LocalArray in localpointer.h or MemoryPool.
 */
template<typename T, int32_t stackCapacity>
class MaybeStackArray {
public:
    // No heap allocation. Use only on the stack.
    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
#if U_HAVE_PLACEMENT_NEW
    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
#endif

    /**
     * Default constructor initializes with internal T[stackCapacity] buffer.
     */
    MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
    /**
     * Automatically allocates the heap array if the argument is larger than the stack capacity.
     * Intended for use when an approximate capacity is known at compile time but the true
     * capacity is not known until runtime.
     */
    MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
        if (capacity < newCapacity) { resize(newCapacity); }
    }
    /**
     * Destructor deletes the array (if owned).
     */
    ~MaybeStackArray() { releaseArray(); }
    /**
     * Move constructor: transfers ownership or copies the stack array.
     */
    MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
    /**
     * Move assignment: transfers ownership or copies the stack array.
     */
    MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
    /**
     * Returns the array capacity (number of T items).
     * @return array capacity
     */
    int32_t getCapacity() const { return capacity; }
    /**
     * Access without ownership change.
     * @return the array pointer
     */
    T *getAlias() const { return ptr; }
    /**
     * Returns the array limit. Simple convenience method.
     * @return getAlias()+getCapacity()
     */
    T *getArrayLimit() const { return getAlias()+capacity; }
    // No "operator T *() const" because that can make
    // expressions like mbs[index] ambiguous for some compilers.
    /**
     * Array item access (const).
     * No index bounds check.
     * @param i array index
     * @return reference to the array item
     */
    const T &operator[](ptrdiff_t i) const { return ptr[i]; }
    /**
     * Array item access (writable).
     * No index bounds check.
     * @param i array index
     * @return reference to the array item
     */
    T &operator[](ptrdiff_t i) { return ptr[i]; }
    /**
     * Deletes the array (if owned) and aliases another one, no transfer of ownership.
     * If the arguments are illegal, then the current array is unchanged.
     * @param otherArray must not be NULL
     * @param otherCapacity must be >0
     */
    void aliasInstead(T *otherArray, int32_t otherCapacity) {
        if(otherArray!=NULL && otherCapacity>0) {
            releaseArray();
            ptr=otherArray;
            capacity=otherCapacity;
            needToRelease=FALSE;
        }
    }
    /**
     * Deletes the array (if owned) and allocates a new one, copying length T items.
     * Returns the new array pointer.
     * If the allocation fails, then the current array is unchanged and
     * this method returns NULL.
     * @param newCapacity can be less than or greater than the current capacity;
     *                    must be >0
     * @param length number of T items to be copied from the old array to the new one
     * @return the allocated array pointer, or NULL if the allocation failed
     */
    inline T *resize(int32_t newCapacity, int32_t length=0);
    /**
     * Gives up ownership of the array if owned, or else clones it,
     * copying length T items; resets itself to the internal stack array.
     * Returns NULL if the allocation failed.
     * @param length number of T items to copy when cloning,
     *        and capacity of the clone when cloning
     * @param resultCapacity will be set to the returned array's capacity (output-only)
     * @return the array pointer;
     *         caller becomes responsible for deleting the array
     */
    inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
private:
    T *ptr;
    int32_t capacity;
    UBool needToRelease;
    T stackArray[stackCapacity];
    void releaseArray() {
        if(needToRelease) {
            uprv_free(ptr);
        }
    }
    void resetToStackArray() {
        ptr=stackArray;
        capacity=stackCapacity;
        needToRelease=FALSE;
    }
    /* No comparison operators with other MaybeStackArray's. */
    bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
    bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
    /* No ownership transfer: No copy constructor, no assignment operator. */
    MaybeStackArray(const MaybeStackArray & /*other*/) {}
    void operator=(const MaybeStackArray & /*other*/) {}
};

template<typename T, int32_t stackCapacity>
icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(
        MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT
        : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) {
    if (src.ptr == src.stackArray) {
        ptr = stackArray;
        uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
    } else {
        src.resetToStackArray();  // take ownership away from src
    }
}

template<typename T, int32_t stackCapacity>
inline MaybeStackArray <T, stackCapacity>&
MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT {
    releaseArray();  // in case this instance had its own memory allocated
    capacity = src.capacity;
    needToRelease = src.needToRelease;
    if (src.ptr == src.stackArray) {
        ptr = stackArray;
        uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
    } else {
        ptr = src.ptr;
        src.resetToStackArray();  // take ownership away from src
    }
    return *this;
}

template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
    if(newCapacity>0) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
      ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
#endif
        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
        if(p!=NULL) {
            if(length>0) {
                if(length>capacity) {
                    length=capacity;
                }
                if(length>newCapacity) {
                    length=newCapacity;
                }
                uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
            }
            releaseArray();
            ptr=p;
            capacity=newCapacity;
            needToRelease=TRUE;
        }
        return p;
    } else {
        return NULL;
    }
}

template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
    T *p;
    if(needToRelease) {
        p=ptr;
    } else if(length<=0) {
        return NULL;
    } else {
        if(length>capacity) {
            length=capacity;
        }
        p=(T *)uprv_malloc(length*sizeof(T));
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
      ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
#endif
        if(p==NULL) {
            return NULL;
        }
        uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
    }
    resultCapacity=length;
    resetToStackArray();
    return p;
}

/**
 * Variant of MaybeStackArray that allocates a header struct and an array
 * in one contiguous memory block, using uprv_malloc() and uprv_free().
 * Provides internal memory with fixed array capacity. Can alias another memory
 * block or allocate one.
 * The stackCapacity is the number of T items in the internal memory,
 * not counting the H header.
 * Unlike LocalMemory and LocalArray, this class never adopts
 * (takes ownership of) another memory block.
 */
template<typename H, typename T, int32_t stackCapacity>
class MaybeStackHeaderAndArray {
public:
    // No heap allocation. Use only on the stack.
    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
#if U_HAVE_PLACEMENT_NEW
    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
#endif

    /**
     * Default constructor initializes with internal H+T[stackCapacity] buffer.
     */
    MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
    /**
     * Destructor deletes the memory (if owned).
     */
    ~MaybeStackHeaderAndArray() { releaseMemory(); }
    /**
     * Returns the array capacity (number of T items).
     * @return array capacity
     */
    int32_t getCapacity() const { return capacity; }
    /**
     * Access without ownership change.
     * @return the header pointer
     */
    H *getAlias() const { return ptr; }
    /**
     * Returns the array start.
     * @return array start, same address as getAlias()+1
     */
    T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
    /**
     * Returns the array limit.
     * @return array limit
     */
    T *getArrayLimit() const { return getArrayStart()+capacity; }
    /**
     * Access without ownership change. Same as getAlias().
     * A class instance can be used directly in expressions that take a T *.
     * @return the header pointer
     */
    operator H *() const { return ptr; }
    /**
     * Array item access (writable).
     * No index bounds check.
     * @param i array index
     * @return reference to the array item
     */
    T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
    /**
     * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
     * If the arguments are illegal, then the current memory is unchanged.
     * @param otherArray must not be NULL
     * @param otherCapacity must be >0
     */
    void aliasInstead(H *otherMemory, int32_t otherCapacity) {
        if(otherMemory!=NULL && otherCapacity>0) {
            releaseMemory();
            ptr=otherMemory;
            capacity=otherCapacity;
            needToRelease=FALSE;
        }
    }
    /**
     * Deletes the memory block (if owned) and allocates a new one,
     * copying the header and length T array items.
     * Returns the new header pointer.
     * If the allocation fails, then the current memory is unchanged and
     * this method returns NULL.
     * @param newCapacity can be less than or greater than the current capacity;
     *                    must be >0
     * @param length number of T items to be copied from the old array to the new one
     * @return the allocated pointer, or NULL if the allocation failed
     */
    inline H *resize(int32_t newCapacity, int32_t length=0);
    /**
     * Gives up ownership of the memory if owned, or else clones it,
     * copying the header and length T array items; resets itself to the internal memory.
     * Returns NULL if the allocation failed.
     * @param length number of T items to copy when cloning,
     *        and array capacity of the clone when cloning
     * @param resultCapacity will be set to the returned array's capacity (output-only)
     * @return the header pointer;
     *         caller becomes responsible for deleting the array
     */
    inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
private:
    H *ptr;
    int32_t capacity;
    UBool needToRelease;
    // stackHeader must precede stackArray immediately.
    H stackHeader;
    T stackArray[stackCapacity];
    void releaseMemory() {
        if(needToRelease) {
            uprv_free(ptr);
        }
    }
    /* No comparison operators with other MaybeStackHeaderAndArray's. */
    bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
    bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
    /* No ownership transfer: No copy constructor, no assignment operator. */
    MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
    void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
};

template<typename H, typename T, int32_t stackCapacity>
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
                                                                int32_t length) {
    if(newCapacity>=0) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
      ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
#endif
        H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
        if(p!=NULL) {
            if(length<0) {
                length=0;
            } else if(length>0) {
                if(length>capacity) {
                    length=capacity;
                }
                if(length>newCapacity) {
                    length=newCapacity;
                }
            }
            uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
            releaseMemory();
            ptr=p;
            capacity=newCapacity;
            needToRelease=TRUE;
        }
        return p;
    } else {
        return NULL;
    }
}

template<typename H, typename T, int32_t stackCapacity>
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
                                                                       int32_t &resultCapacity) {
    H *p;
    if(needToRelease) {
        p=ptr;
    } else {
        if(length<0) {
            length=0;
        } else if(length>capacity) {
            length=capacity;
        }
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
      ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
#endif
        p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
        if(p==NULL) {
            return NULL;
        }
        uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
    }
    resultCapacity=length;
    ptr=&stackHeader;
    capacity=stackCapacity;
    needToRelease=FALSE;
    return p;
}

/**
 * A simple memory management class that creates new heap allocated objects (of
 * any class that has a public constructor), keeps track of them and eventually
 * deletes them all in its own destructor.
 *
 * A typical use-case would be code like this:
 *
 *     MemoryPool<MyType> pool;
 *
 *     MyType* o1 = pool.create();
 *     if (o1 != nullptr) {
 *         foo(o1);
 *     }
 *
 *     MyType* o2 = pool.create(1, 2, 3);
 *     if (o2 != nullptr) {
 *         bar(o2);
 *     }
 *
 *     // MemoryPool will take care of deleting the MyType objects.
 *
 * It doesn't do anything more than that, and is intentionally kept minimalist.
 */
template<typename T, int32_t stackCapacity = 8>
class MemoryPool : public UMemory {
public:
    MemoryPool() : count(0), pool() {}

    ~MemoryPool() {
        for (int32_t i = 0; i < count; ++i) {
            delete pool[i];
        }
    }

    MemoryPool(const MemoryPool&) = delete;
    MemoryPool& operator=(const MemoryPool&) = delete;

    MemoryPool(MemoryPool&& other) U_NOEXCEPT : count(other.count),
                                                pool(std::move(other.pool)) {
        other.count = 0;
    }

    MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT {
        count = other.count;
        pool = std::move(other.pool);
        other.count = 0;
        return *this;
    }

    /**
     * Creates a new object of typename T, by forwarding any and all arguments
     * to the typename T constructor.
     *
     * @param args Arguments to be forwarded to the typename T constructor.
     * @return A pointer to the newly created object, or nullptr on error.
     */
    template<typename... Args>
    T* create(Args&&... args) {
        int32_t capacity = pool.getCapacity();
        if (count == capacity &&
            pool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity,
                        capacity) == nullptr) {
            return nullptr;
        }
        return pool[count++] = new T(std::forward<Args>(args)...);
    }

private:
    int32_t count;
    MaybeStackArray<T*, stackCapacity> pool;
};

U_NAMESPACE_END

#endif  /* __cplusplus */

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2007, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  bmpset.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2007jan29
*   created by: Markus W. Scherer
*/





U_NAMESPACE_BEGIN

/*
 * Helper class for frozen UnicodeSets, implements contains() and span()
 * optimized for BMP code points. Structured to be UTF-8-friendly.
 *
 * Latin-1: Look up bytes.
 * 2-byte characters: Bits organized vertically.
 * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
 *                    with mixed for illegal ranges.
 * Supplementary characters: Binary search over
 * the supplementary part of the parent set's inversion list.
 */
class BMPSet : public UMemory {
public:
    BMPSet(const int32_t *parentList, int32_t parentListLength);
    BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
    virtual ~BMPSet();

    virtual UBool contains(UChar32 c) const;

    /*
     * Span the initial substring for which each character c has spanCondition==contains(c).
     * It must be s<limit and spanCondition==0 or 1.
     * @return The string pointer which limits the span.
     */
    const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;

    /*
     * Span the trailing substring for which each character c has spanCondition==contains(c).
     * It must be s<limit and spanCondition==0 or 1.
     * @return The string pointer which starts the span.
     */
    const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;

    /*
     * Span the initial substring for which each character c has spanCondition==contains(c).
     * It must be length>0 and spanCondition==0 or 1.
     * @return The string pointer which limits the span.
     */
    const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;

    /*
     * Span the trailing substring for which each character c has spanCondition==contains(c).
     * It must be length>0 and spanCondition==0 or 1.
     * @return The start of the span.
     */
    int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;

private:
    void initBits();
    void overrideIllegal();

    /**
     * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
     * binary search is restricted for finding code points in a certain range.
     *
     * For restricting the search for finding in the range start..end,
     * pass in
     *   lo=findCodePoint(start) and
     *   hi=findCodePoint(end)
     * with 0<=lo<=hi<len.
     * findCodePoint(c) defaults to lo=0 and hi=len-1.
     *
     * @param c a character in a subrange of MIN_VALUE..MAX_VALUE
     * @param lo The lowest index to be returned.
     * @param hi The highest index to be returned.
     * @return the smallest integer i in the range lo..hi,
     *         inclusive, such that c < list[i]
     */
    int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;

    inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;

    /*
     * One byte 0 or 1 per Latin-1 character.
     */
    UBool latin1Contains[0x100];

    /* TRUE if contains(U+FFFD). */
    UBool containsFFFD;

    /*
     * One bit per code point from U+0000..U+07FF.
     * The bits are organized vertically; consecutive code points
     * correspond to the same bit positions in consecutive table words.
     * With code point parts
     *   lead=c{10..6}
     *   trail=c{5..0}
     * it is set.contains(c)==(table7FF[trail] bit lead)
     *
     * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
     * for faster validity checking at runtime.
     */
    uint32_t table7FF[64];

    /*
     * One bit per 64 BMP code points.
     * The bits are organized vertically; consecutive 64-code point blocks
     * correspond to the same bit position in consecutive table words.
     * With code point parts
     *   lead=c{15..12}
     *   t1=c{11..6}
     * test bits (lead+16) and lead in bmpBlockBits[t1].
     * If the upper bit is 0, then the lower bit indicates if contains(c)
     * for all code points in the 64-block.
     * If the upper bit is 1, then the block is mixed and set.contains(c)
     * must be called.
     *
     * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
     * the result of contains(FFFD) for faster validity checking at runtime.
     */
    uint32_t bmpBlockBits[64];

    /*
     * Inversion list indexes for restricted binary searches in
     * findCodePoint(), from
     * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
     * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
     * always looked up in the bit tables.
     * The last pair of indexes is for finding supplementary code points.
     */
    int32_t list4kStarts[18];

    /*
     * The inversion list of the parent set, for the slower contains() implementation
     * for mixed BMP blocks and for supplementary code points.
     * The list is terminated with list[listLength-1]=0x110000.
     */
    const int32_t *list;
    int32_t listLength;
};

inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
    return (UBool)(findCodePoint(c, lo, hi) & 1);
}

U_NAMESPACE_END


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2002-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File uassert.h
*
*  Contains the U_ASSERT and UPRV_UNREACHABLE macros
*
******************************************************************************
*/

/* utypes.h is included to get the proper define for uint8_t */

/* for abort */
#include <stdlib.h>

/**
 * \def U_ASSERT
 * By default, U_ASSERT just wraps the C library assert macro.
 * By changing the definition here, the assert behavior for ICU can be changed
 * without affecting other non - ICU uses of the C library assert().
*/
#if U_DEBUG
#   include <assert.h>
#   define U_ASSERT(exp) assert(exp)
#else
#   define U_ASSERT(exp)
#endif

/**
 * \def UPRV_UNREACHABLE
 * This macro is used to unconditionally abort if unreachable code is ever executed.
 * @internal
*/
#if defined(UPRV_UNREACHABLE)
    // Use the predefined value.
#else
#   define UPRV_UNREACHABLE abort()
#endif



U_NAMESPACE_BEGIN

BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
        list(parentList), listLength(parentListLength) {
    uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
    uprv_memset(table7FF, 0, sizeof(table7FF));
    uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));

    /*
     * Set the list indexes for binary searches for
     * U+0800, U+1000, U+2000, .., U+F000, U+10000.
     * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
     * looked up in the bit tables.
     * The last pair of indexes is for finding supplementary code points.
     */
    list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
    int32_t i;
    for(i=1; i<=0x10; ++i) {
        list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
    }
    list4kStarts[0x11]=listLength-1;
    containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);

    initBits();
    overrideIllegal();
}

BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
        containsFFFD(otherBMPSet.containsFFFD),
        list(newParentList), listLength(newParentListLength) {
    uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
    uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
    uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
    uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
}

BMPSet::~BMPSet() {
}

/*
 * Set bits in a bit rectangle in "vertical" bit organization.
 * start<limit<=0x800
 */
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
    U_ASSERT(start<limit);
    U_ASSERT(limit<=0x800);

    int32_t lead=start>>6;  // Named for UTF-8 2-byte lead byte with upper 5 bits.
    int32_t trail=start&0x3f;  // Named for UTF-8 2-byte trail byte with lower 6 bits.

    // Set one bit indicating an all-one block.
    uint32_t bits=(uint32_t)1<<lead;
    if((start+1)==limit) {  // Single-character shortcut.
        table[trail]|=bits;
        return;
    }

    int32_t limitLead=limit>>6;
    int32_t limitTrail=limit&0x3f;

    if(lead==limitLead) {
        // Partial vertical bit column.
        while(trail<limitTrail) {
            table[trail++]|=bits;
        }
    } else {
        // Partial vertical bit column,
        // followed by a bit rectangle,
        // followed by another partial vertical bit column.
        if(trail>0) {
            do {
                table[trail++]|=bits;
            } while(trail<64);
            ++lead;
        }
        if(lead<limitLead) {
            bits=~(((unsigned)1<<lead)-1);
            if(limitLead<0x20) {
                bits&=((unsigned)1<<limitLead)-1;
            }
            for(trail=0; trail<64; ++trail) {
                table[trail]|=bits;
            }
        }
        // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
        // In that case, bits=1<<limitLead is undefined but the bits value
        // is not used because trail<limitTrail is already false.
        bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
        for(trail=0; trail<limitTrail; ++trail) {
            table[trail]|=bits;
        }
    }
}

void BMPSet::initBits() {
    UChar32 start, limit;
    int32_t listIndex=0;

    // Set latin1Contains[].
    do {
        start=list[listIndex++];
        if(listIndex<listLength) {
            limit=list[listIndex++];
        } else {
            limit=0x110000;
        }
        if(start>=0x100) {
            break;
        }
        do {
            latin1Contains[start++]=1;
        } while(start<limit && start<0x100);
    } while(limit<=0x100);

    // Find the first range overlapping with (or after) 80..FF again,
    // to include them in table7FF as well.
    for(listIndex=0;;) {
        start=list[listIndex++];
        if(listIndex<listLength) {
            limit=list[listIndex++];
        } else {
            limit=0x110000;
        }
        if(limit>0x80) {
            if(start<0x80) {
                start=0x80;
            }
            break;
        }
    }

    // Set table7FF[].
    while(start<0x800) {
        set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
        if(limit>0x800) {
            start=0x800;
            break;
        }

        start=list[listIndex++];
        if(listIndex<listLength) {
            limit=list[listIndex++];
        } else {
            limit=0x110000;
        }
    }

    // Set bmpBlockBits[].
    int32_t minStart=0x800;
    while(start<0x10000) {
        if(limit>0x10000) {
            limit=0x10000;
        }

        if(start<minStart) {
            start=minStart;
        }
        if(start<limit) {  // Else: Another range entirely in a known mixed-value block.
            if(start&0x3f) {
                // Mixed-value block of 64 code points.
                start>>=6;
                bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
                start=(start+1)<<6;  // Round up to the next block boundary.
                minStart=start;      // Ignore further ranges in this block.
            }
            if(start<limit) {
                if(start<(limit&~0x3f)) {
                    // Multiple all-ones blocks of 64 code points each.
                    set32x64Bits(bmpBlockBits, start>>6, limit>>6);
                }

                if(limit&0x3f) {
                    // Mixed-value block of 64 code points.
                    limit>>=6;
                    bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
                    limit=(limit+1)<<6;  // Round up to the next block boundary.
                    minStart=limit;      // Ignore further ranges in this block.
                }
            }
        }

        if(limit==0x10000) {
            break;
        }

        start=list[listIndex++];
        if(listIndex<listLength) {
            limit=list[listIndex++];
        } else {
            limit=0x110000;
        }
    }
}

/*
 * Override some bits and bytes to the result of contains(FFFD)
 * for faster validity checking at runtime.
 * No need to set 0 values where they were reset to 0 in the constructor
 * and not modified by initBits().
 * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
 * Need to set 0 values for surrogates D800..DFFF.
 */
void BMPSet::overrideIllegal() {
    uint32_t bits, mask;
    int32_t i;

    if(containsFFFD) {
        bits=3;                 // Lead bytes 0xC0 and 0xC1.
        for(i=0; i<64; ++i) {
            table7FF[i]|=bits;
        }

        bits=1;                 // Lead byte 0xE0.
        for(i=0; i<32; ++i) {   // First half of 4k block.
            bmpBlockBits[i]|=bits;
        }

        mask= static_cast<uint32_t>(~(0x10001<<0xd));   // Lead byte 0xED.
        bits=1<<0xd;
        for(i=32; i<64; ++i) {  // Second half of 4k block.
            bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
        }
    } else {
        mask= static_cast<uint32_t>(~(0x10001<<0xd));   // Lead byte 0xED.
        for(i=32; i<64; ++i) {  // Second half of 4k block.
            bmpBlockBits[i]&=mask;
        }
    }
}

int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
    /* Examples:
                                       findCodePoint(c)
       set              list[]         c=0 1 3 4 7 8
       ===              ==============   ===========
       []               [110000]         0 0 0 0 0 0
       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
       [:Any:]          [0, 110000]      1 1 1 1 1 1
     */

    // Return the smallest i such that c < list[i].  Assume
    // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
    if (c < list[lo])
        return lo;
    // High runner test.  c is often after the last range, so an
    // initial check for this condition pays off.
    if (lo >= hi || c >= list[hi-1])
        return hi;
    // invariant: c >= list[lo]
    // invariant: c < list[hi]
    for (;;) {
        int32_t i = (lo + hi) >> 1;
        if (i == lo) {
            break; // Found!
        } else if (c < list[i]) {
            hi = i;
        } else {
            lo = i;
        }
    }
    return hi;
}

UBool
BMPSet::contains(UChar32 c) const {
    if((uint32_t)c<=0xff) {
        return (UBool)latin1Contains[c];
    } else if((uint32_t)c<=0x7ff) {
        return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
    } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
        int lead=c>>12;
        uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
        if(twoBits<=1) {
            // All 64 code points with the same bits 15..6
            // are either in the set or not.
            return (UBool)twoBits;
        } else {
            // Look up the code point in its 4k block of code points.
            return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
        }
    } else if((uint32_t)c<=0x10ffff) {
        // surrogate or supplementary code point
        return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
    } else {
        // Out-of-range code points get FALSE, consistent with long-standing
        // behavior of UnicodeSet::contains(c).
        return FALSE;
    }
}

/*
 * Check for sufficient length for trail unit for each surrogate pair.
 * Handle single surrogates as surrogate code points as usual in ICU.
 */
const UChar *
BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
    UChar c, c2;

    if(spanCondition) {
        // span
        do {
            c=*s;
            if(c<=0xff) {
                if(!latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
                int lead=c>>12;
                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
                if(twoBits<=1) {
                    // All 64 code points with the same bits 15..6
                    // are either in the set or not.
                    if(twoBits==0) {
                        break;
                    }
                } else {
                    // Look up the code point in its 4k block of code points.
                    if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
                        break;
                    }
                }
            } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
                // surrogate code point
                if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
                    break;
                }
            } else {
                // surrogate pair
                if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
                    break;
                }
                ++s;
            }
        } while(++s<limit);
    } else {
        // span not
        do {
            c=*s;
            if(c<=0xff) {
                if(latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
                int lead=c>>12;
                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
                if(twoBits<=1) {
                    // All 64 code points with the same bits 15..6
                    // are either in the set or not.
                    if(twoBits!=0) {
                        break;
                    }
                } else {
                    // Look up the code point in its 4k block of code points.
                    if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
                        break;
                    }
                }
            } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
                // surrogate code point
                if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
                    break;
                }
            } else {
                // surrogate pair
                if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
                    break;
                }
                ++s;
            }
        } while(++s<limit);
    }
    return s;
}

/* Symmetrical with span(). */
const UChar *
BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
    UChar c, c2;

    if(spanCondition) {
        // span
        for(;;) {
            c=*(--limit);
            if(c<=0xff) {
                if(!latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
                int lead=c>>12;
                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
                if(twoBits<=1) {
                    // All 64 code points with the same bits 15..6
                    // are either in the set or not.
                    if(twoBits==0) {
                        break;
                    }
                } else {
                    // Look up the code point in its 4k block of code points.
                    if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
                        break;
                    }
                }
            } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
                // surrogate code point
                if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
                    break;
                }
            } else {
                // surrogate pair
                if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
                    break;
                }
                --limit;
            }
            if(s==limit) {
                return s;
            }
        }
    } else {
        // span not
        for(;;) {
            c=*(--limit);
            if(c<=0xff) {
                if(latin1Contains[c]) {
                    break;
                }
            } else if(c<=0x7ff) {
                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
                int lead=c>>12;
                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
                if(twoBits<=1) {
                    // All 64 code points with the same bits 15..6
                    // are either in the set or not.
                    if(twoBits!=0) {
                        break;
                    }
                } else {
                    // Look up the code point in its 4k block of code points.
                    if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
                        break;
                    }
                }
            } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
                // surrogate code point
                if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
                    break;
                }
            } else {
                // surrogate pair
                if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
                    break;
                }
                --limit;
            }
            if(s==limit) {
                return s;
            }
        }
    }
    return limit+1;
}

/*
 * Precheck for sufficient trail bytes at end of string only once per span.
 * Check validity.
 */
const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
    const uint8_t *limit=s+length;
    uint8_t b=*s;
    if(U8_IS_SINGLE(b)) {
        // Initial all-ASCII span.
        if(spanCondition) {
            do {
                if(!latin1Contains[b] || ++s==limit) {
                    return s;
                }
                b=*s;
            } while(U8_IS_SINGLE(b));
        } else {
            do {
                if(latin1Contains[b] || ++s==limit) {
                    return s;
                }
                b=*s;
            } while(U8_IS_SINGLE(b));
        }
        length=(int32_t)(limit-s);
    }

    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
    }

    const uint8_t *limit0=limit;

    /*
     * Make sure that the last 1/2/3/4-byte sequence before limit is complete
     * or runs into a lead byte.
     * In the span loop compare s with limit only once
     * per multi-byte character.
     *
     * Give a trailing illegal sequence the same value as the result of contains(FFFD),
     * including it if that is part of the span, otherwise set limit0 to before
     * the truncated sequence.
     */
    b=*(limit-1);
    if((int8_t)b<0) {
        // b>=0x80: lead or trail byte
        if(b<0xc0) {
            // single trail byte, check for preceding 3- or 4-byte lead byte
            if(length>=2 && (b=*(limit-2))>=0xe0) {
                limit-=2;
                if(containsFFFD!=spanCondition) {
                    limit0=limit;
                }
            } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
                // 4-byte lead byte with only two trail bytes
                limit-=3;
                if(containsFFFD!=spanCondition) {
                    limit0=limit;
                }
            }
        } else {
            // lead byte with no trail bytes
            --limit;
            if(containsFFFD!=spanCondition) {
                limit0=limit;
            }
        }
    }

    uint8_t t1, t2, t3;

    while(s<limit) {
        b=*s;
        if(U8_IS_SINGLE(b)) {
            // ASCII
            if(spanCondition) {
                do {
                    if(!latin1Contains[b]) {
                        return s;
                    } else if(++s==limit) {
                        return limit0;
                    }
                    b=*s;
                } while(U8_IS_SINGLE(b));
            } else {
                do {
                    if(latin1Contains[b]) {
                        return s;
                    } else if(++s==limit) {
                        return limit0;
                    }
                    b=*s;
                } while(U8_IS_SINGLE(b));
            }
        }
        ++s;  // Advance past the lead byte.
        if(b>=0xe0) {
            if(b<0xf0) {
                if( /* handle U+0000..U+FFFF inline */
                    (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
                    (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
                ) {
                    b&=0xf;
                    uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
                    if(twoBits<=1) {
                        // All 64 code points with this lead byte and middle trail byte
                        // are either in the set or not.
                        if(twoBits!=(uint32_t)spanCondition) {
                            return s-1;
                        }
                    } else {
                        // Look up the code point in its 4k block of code points.
                        UChar32 c=(b<<12)|(t1<<6)|t2;
                        if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
                            return s-1;
                        }
                    }
                    s+=2;
                    continue;
                }
            } else if( /* handle U+10000..U+10FFFF inline */
                (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
                (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
                (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
            ) {
                // Give an illegal sequence the same value as the result of contains(FFFD).
                UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
                if( (   (0x10000<=c && c<=0x10ffff) ?
                            containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
                            containsFFFD
                    ) != spanCondition
                ) {
                    return s-1;
                }
                s+=3;
                continue;
            }
        } else {
            if( /* handle U+0000..U+07FF inline */
                b>=0xc0 &&
                (t1=(uint8_t)(*s-0x80)) <= 0x3f
            ) {
                if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
                    return s-1;
                }
                ++s;
                continue;
            }
        }

        // Give an illegal sequence the same value as the result of contains(FFFD).
        // Handle each byte of an illegal sequence separately to simplify the code;
        // no need to optimize error handling.
        if(containsFFFD!=spanCondition) {
            return s-1;
        }
    }

    return limit0;
}

/*
 * While going backwards through UTF-8 optimize only for ASCII.
 * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
 * possible to tell from the last byte in a multi-byte sequence how many
 * preceding bytes there should be. Therefore, going backwards through UTF-8
 * is much harder than going forward.
 */
int32_t
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
    }

    uint8_t b;

    do {
        b=s[--length];
        if(U8_IS_SINGLE(b)) {
            // ASCII sub-span
            if(spanCondition) {
                do {
                    if(!latin1Contains[b]) {
                        return length+1;
                    } else if(length==0) {
                        return 0;
                    }
                    b=s[--length];
                } while(U8_IS_SINGLE(b));
            } else {
                do {
                    if(latin1Contains[b]) {
                        return length+1;
                    } else if(length==0) {
                        return 0;
                    }
                    b=s[--length];
                } while(U8_IS_SINGLE(b));
            }
        }

        int32_t prev=length;
        UChar32 c;
        // trail byte: collect a multi-byte character
        // (or  lead byte in last-trail position)
        c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
        // c is a valid code point, not ASCII, not a surrogate
        if(c<=0x7ff) {
            if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
                return prev+1;
            }
        } else if(c<=0xffff) {
            int lead=c>>12;
            uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
            if(twoBits<=1) {
                // All 64 code points with the same bits 15..6
                // are either in the set or not.
                if(twoBits!=(uint32_t)spanCondition) {
                    return prev+1;
                }
            } else {
                // Look up the code point in its 4k block of code points.
                if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
                    return prev+1;
                }
            }
        } else {
            if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
                return prev+1;
            }
        }
    } while(length>0);
    return 0;
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 ************************************************************************************
 * Copyright (C) 2006-2016, International Business Machines Corporation
 * and others. All Rights Reserved.
 ************************************************************************************
 */



// #if !UCONFIG_NO_BREAK_ITERATION

// #include "unicode/uchar.h"
// #include "unicode/uniset.h"
// #include "unicode/chariter.h"
// #include "unicode/ures.h"
// #include "unicode/udata.h"
// #include "unicode/putil.h"
// #include "unicode/ustring.h"
// #include "unicode/uscript.h"
// #include "unicode/ucharstrie.h"
// #include "unicode/bytestrie.h"

// #include "brkeng.h"
// #include "cmemory.h"
// #include "dictbe.h"
// #include "charstr.h"
// #include "dictionarydata.h"
// #include "mutex.h"
// #include "uvector.h"
// #include "umutex.h"
// #include "uresimp.h"
// #include "ubrkimpl.h"

// U_NAMESPACE_BEGIN

// /*
//  ******************************************************************
//  */

// LanguageBreakEngine::LanguageBreakEngine() {
// }

// LanguageBreakEngine::~LanguageBreakEngine() {
// }

// /*
//  ******************************************************************
//  */

// LanguageBreakFactory::LanguageBreakFactory() {
// }

// LanguageBreakFactory::~LanguageBreakFactory() {
// }

// /*
//  ******************************************************************
//  */

// UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
//     (void)status;
// }

// UnhandledEngine::~UnhandledEngine() {
//     delete fHandled;
//     fHandled = nullptr;
// }

// UBool
// UnhandledEngine::handles(UChar32 c) const {
//     return fHandled && fHandled->contains(c);
// }

// int32_t
// UnhandledEngine::findBreaks( UText *text,
//                              int32_t /* startPos */,
//                              int32_t endPos,
//                              UVector32 &/*foundBreaks*/ ) const {
//     UChar32 c = utext_current32(text);
//     while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
//         utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
//         c = utext_current32(text);
//     }
//     return 0;
// }

// void
// UnhandledEngine::handleCharacter(UChar32 c) {
//     if (fHandled == nullptr) {
//         fHandled = new UnicodeSet();
//         if (fHandled == nullptr) {
//             return;
//         }
//     }
//     if (!fHandled->contains(c)) {
//         UErrorCode status = U_ZERO_ERROR;
//         // Apply the entire script of the character.
//         int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
//         fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
//     }
// }

// /*
//  ******************************************************************
//  */

// ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
//     fEngines = 0;
// }

// ICULanguageBreakFactory::~ICULanguageBreakFactory() {
//     if (fEngines != 0) {
//         delete fEngines;
//     }
// }

// U_NAMESPACE_END
// U_CDECL_BEGIN
// static void U_CALLCONV _deleteEngine(void *obj) {
//     delete (const icu::LanguageBreakEngine *) obj;
// }
// U_CDECL_END
// U_NAMESPACE_BEGIN

// const LanguageBreakEngine *
// ICULanguageBreakFactory::getEngineFor(UChar32 c) {
//     const LanguageBreakEngine *lbe = NULL;
//     UErrorCode  status = U_ZERO_ERROR;

//     static UMutex gBreakEngineMutex;
//     Mutex m(&gBreakEngineMutex);

//     if (fEngines == NULL) {
//         UStack  *engines = new UStack(_deleteEngine, NULL, status);
//         if (U_FAILURE(status) || engines == NULL) {
//             // Note: no way to return error code to caller.
//             delete engines;
//             return NULL;
//         }
//         fEngines = engines;
//     } else {
//         int32_t i = fEngines->size();
//         while (--i >= 0) {
//             lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
//             if (lbe != NULL && lbe->handles(c)) {
//                 return lbe;
//             }
//         }
//     }

//     // We didn't find an engine. Create one.
//     lbe = loadEngineFor(c);
//     if (lbe != NULL) {
//         fEngines->push((void *)lbe, status);
//     }
//     return lbe;
// }

// const LanguageBreakEngine *
// ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
//     UErrorCode status = U_ZERO_ERROR;
//     UScriptCode code = uscript_getScript(c, &status);
//     if (U_SUCCESS(status)) {
//         DictionaryMatcher *m = loadDictionaryMatcherFor(code);
//         if (m != NULL) {
//             const LanguageBreakEngine *engine = NULL;
//             switch(code) {
//             case USCRIPT_THAI:
//                 engine = new ThaiBreakEngine(m, status);
//                 break;
//             case USCRIPT_LAO:
//                 engine = new LaoBreakEngine(m, status);
//                 break;
//             case USCRIPT_MYANMAR:
//                 engine = new BurmeseBreakEngine(m, status);
//                 break;
//             case USCRIPT_KHMER:
//                 engine = new KhmerBreakEngine(m, status);
//                 break;

// #if !UCONFIG_NO_NORMALIZATION
//                 // CJK not available w/o normalization
//             case USCRIPT_HANGUL:
//                 engine = new CjkBreakEngine(m, kKorean, status);
//                 break;

//             // use same BreakEngine and dictionary for both Chinese and Japanese
//             case USCRIPT_HIRAGANA:
//             case USCRIPT_KATAKANA:
//             case USCRIPT_HAN:
//                 engine = new CjkBreakEngine(m, kChineseJapanese, status);
//                 break;
// #if 0
//             // TODO: Have to get some characters with script=common handled
//             // by CjkBreakEngine (e.g. U+309B). Simply subjecting
//             // them to CjkBreakEngine does not work. The engine has to
//             // special-case them.
//             case USCRIPT_COMMON:
//             {
//                 UBlockCode block = ublock_getCode(code);
//                 if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
//                    engine = new CjkBreakEngine(dict, kChineseJapanese, status);
//                 break;
//             }
// #endif
// #endif

//             default:
//                 break;
//             }
//             if (engine == NULL) {
//                 delete m;
//             }
//             else if (U_FAILURE(status)) {
//                 delete engine;
//                 engine = NULL;
//             }
//             return engine;
//         }
//     }
//     return NULL;
// }

// DictionaryMatcher *
// ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
//     UErrorCode status = U_ZERO_ERROR;
//     // open root from brkitr tree.
//     UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
//     b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
//     int32_t dictnlength = 0;
//     const UChar *dictfname =
//         ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
//     if (U_FAILURE(status)) {
//         ures_close(b);
//         return NULL;
//     }
//     CharString dictnbuf;
//     CharString ext;
//     const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
//     if (extStart != NULL) {
//         int32_t len = (int32_t)(extStart - dictfname);
//         ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
//         dictnlength = len;
//     }
//     dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
//     ures_close(b);

//     UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
//     if (U_SUCCESS(status)) {
//         // build trie
//         const uint8_t *data = (const uint8_t *)udata_getMemory(file);
//         const int32_t *indexes = (const int32_t *)data;
//         const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
//         const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
//         DictionaryMatcher *m = NULL;
//         if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
//             const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
//             const char *characters = (const char *)(data + offset);
//             m = new BytesDictionaryMatcher(characters, transform, file);
//         }
//         else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
//             const UChar *characters = (const UChar *)(data + offset);
//             m = new UCharsDictionaryMatcher(characters, file);
//         }
//         if (m == NULL) {
//             // no matcher exists to take ownership - either we are an invalid
//             // type or memory allocation failed
//             udata_close(file);
//         }
//         return m;
//     } else if (dictfname != NULL) {
//         // we don't have a dictionary matcher.
//         // returning NULL here will cause us to fail to find a dictionary break engine, as expected
//         status = U_ZERO_ERROR;
//         return NULL;
//     }
//     return NULL;
// }

// U_NAMESPACE_END

// #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File brkiter.cpp
*
* Modification History:
*
*   Date        Name        Description
*   02/18/97    aliu        Converted from OpenClass.  Added DONE.
*   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.
*****************************************************************************************
*/

// *****************************************************************************
// This file was generated from the java source file BreakIterator.java
// *****************************************************************************



// #if !UCONFIG_NO_BREAK_ITERATION

// #include "unicode/rbbi.h"
// #include "unicode/brkiter.h"
// #include "unicode/udata.h"
// #include "unicode/ures.h"
// #include "unicode/ustring.h"
// #include "unicode/filteredbrk.h"
// #include "ucln_cmn.h"
// #include "cstring.h"
// #include "umutex.h"
// #include "servloc.h"
// #include "locbased.h"
// #include "uresimp.h"
// #include "uassert.h"
// #include "ubrkimpl.h"
// #include "charstr.h"

// // *****************************************************************************
// // class BreakIterator
// // This class implements methods for finding the location of boundaries in text.
// // Instances of BreakIterator maintain a current position and scan over text
// // returning the index of characters where boundaries occur.
// // *****************************************************************************

// U_NAMESPACE_BEGIN

// // -------------------------------------

// BreakIterator*
// BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
// {
//     char fnbuff[256];
//     char ext[4]={'\0'};
//     CharString actualLocale;
//     int32_t size;
//     const UChar* brkfname = NULL;
//     UResourceBundle brkRulesStack;
//     UResourceBundle brkNameStack;
//     UResourceBundle *brkRules = &brkRulesStack;
//     UResourceBundle *brkName  = &brkNameStack;
//     RuleBasedBreakIterator *result = NULL;

//     if (U_FAILURE(status))
//         return NULL;

//     ures_initStackObject(brkRules);
//     ures_initStackObject(brkName);

//     // Get the locale
//     UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);

//     // Get the "boundaries" array.
//     if (U_SUCCESS(status)) {
//         brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
//         // Get the string object naming the rules file
//         brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
//         // Get the actual string
//         brkfname = ures_getString(brkName, &size, &status);
//         U_ASSERT((size_t)size<sizeof(fnbuff));
//         if ((size_t)size>=sizeof(fnbuff)) {
//             size=0;
//             if (U_SUCCESS(status)) {
//                 status = U_BUFFER_OVERFLOW_ERROR;
//             }
//         }

//         // Use the string if we found it
//         if (U_SUCCESS(status) && brkfname) {
//             actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);

//             UChar* extStart=u_strchr(brkfname, 0x002e);
//             int len = 0;
//             if(extStart!=NULL){
//                 len = (int)(extStart-brkfname);
//                 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
//                 u_UCharsToChars(brkfname, fnbuff, len);
//             }
//             fnbuff[len]=0; // nul terminate
//         }
//     }

//     ures_close(brkRules);
//     ures_close(brkName);

//     UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
//     if (U_FAILURE(status)) {
//         ures_close(b);
//         return NULL;
//     }

//     // Create a RuleBasedBreakIterator
//     result = new RuleBasedBreakIterator(file, status);

//     // If there is a result, set the valid locale and actual locale, and the kind
//     if (U_SUCCESS(status) && result != NULL) {
//         U_LOCALE_BASED(locBased, *(BreakIterator*)result);
//         locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
//                               actualLocale.data());
//     }

//     ures_close(b);

//     if (U_FAILURE(status) && result != NULL) {  // Sometimes redundant check, but simple
//         delete result;
//         return NULL;
//     }

//     if (result == NULL) {
//         udata_close(file);
//         if (U_SUCCESS(status)) {
//             status = U_MEMORY_ALLOCATION_ERROR;
//         }
//     }

//     return result;
// }

// // Creates a break iterator for word breaks.
// BreakIterator* U_EXPORT2
// BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
// {
//     return createInstance(key, UBRK_WORD, status);
// }

// // -------------------------------------

// // Creates a break iterator  for line breaks.
// BreakIterator* U_EXPORT2
// BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
// {
//     return createInstance(key, UBRK_LINE, status);
// }

// // -------------------------------------

// // Creates a break iterator  for character breaks.
// BreakIterator* U_EXPORT2
// BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
// {
//     return createInstance(key, UBRK_CHARACTER, status);
// }

// // -------------------------------------

// // Creates a break iterator  for sentence breaks.
// BreakIterator* U_EXPORT2
// BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
// {
//     return createInstance(key, UBRK_SENTENCE, status);
// }

// // -------------------------------------

// // Creates a break iterator for title casing breaks.
// BreakIterator* U_EXPORT2
// BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
// {
//     return createInstance(key, UBRK_TITLE, status);
// }

// // -------------------------------------

// // Gets all the available locales that has localized text boundary data.
// const Locale* U_EXPORT2
// BreakIterator::getAvailableLocales(int32_t& count)
// {
//     return Locale::getAvailableLocales(count);
// }

// // ------------------------------------------
// //
// // Constructors, destructor and assignment operator
// //
// //-------------------------------------------

// BreakIterator::BreakIterator()
// {
//     *validLocale = *actualLocale = 0;
// }

// BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
//     uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
//     uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
// }

// BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
//     if (this != &other) {
//         uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
//         uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
//     }
//     return *this;
// }

// BreakIterator::~BreakIterator()
// {
// }

// // ------------------------------------------
// //
// // Registration
// //
// //-------------------------------------------
// #if !UCONFIG_NO_SERVICE

// // -------------------------------------

// class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
// public:
//     virtual ~ICUBreakIteratorFactory();
// protected:
//     virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
//         return BreakIterator::makeInstance(loc, kind, status);
//     }
// };

// ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}

// // -------------------------------------

// class ICUBreakIteratorService : public ICULocaleService {
// public:
//     ICUBreakIteratorService()
//         : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
//     {
//         UErrorCode status = U_ZERO_ERROR;
//         registerFactory(new ICUBreakIteratorFactory(), status);
//     }

//     virtual ~ICUBreakIteratorService();

//     virtual UObject* cloneInstance(UObject* instance) const {
//         return ((BreakIterator*)instance)->clone();
//     }

//     virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
//         LocaleKey& lkey = (LocaleKey&)key;
//         int32_t kind = lkey.kind();
//         Locale loc;
//         lkey.currentLocale(loc);
//         return BreakIterator::makeInstance(loc, kind, status);
//     }

//     virtual UBool isDefault() const {
//         return countFactories() == 1;
//     }
// };

// ICUBreakIteratorService::~ICUBreakIteratorService() {}

// // -------------------------------------

// // defined in ucln_cmn.h
// U_NAMESPACE_END

// static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
// static icu::ICULocaleService* gService = NULL;



// /**
//  * Release all static memory held by breakiterator.
//  */
// U_CDECL_BEGIN
// static UBool U_CALLCONV breakiterator_cleanup(void) {
// #if !UCONFIG_NO_SERVICE
//     if (gService) {
//         delete gService;
//         gService = NULL;
//     }
//     gInitOnceBrkiter.reset();
// #endif
//     return TRUE;
// }
// U_CDECL_END
// U_NAMESPACE_BEGIN

// static void U_CALLCONV
// initService(void) {
//     gService = new ICUBreakIteratorService();
//     ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
// }

// static ICULocaleService*
// getService(void)
// {
//     umtx_initOnce(gInitOnceBrkiter, &initService);
//     return gService;
// }


// // -------------------------------------

// static inline UBool
// hasService(void)
// {
//     return !gInitOnceBrkiter.isReset() && getService() != NULL;
// }

// // -------------------------------------

// URegistryKey U_EXPORT2
// BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
// {
//     ICULocaleService *service = getService();
//     if (service == NULL) {
//         status = U_MEMORY_ALLOCATION_ERROR;
//         return NULL;
//     }
//     return service->registerInstance(toAdopt, locale, kind, status);
// }

// // -------------------------------------

// UBool U_EXPORT2
// BreakIterator::unregister(URegistryKey key, UErrorCode& status)
// {
//     if (U_SUCCESS(status)) {
//         if (hasService()) {
//             return gService->unregister(key, status);
//         }
//         status = U_MEMORY_ALLOCATION_ERROR;
//     }
//     return FALSE;
// }

// // -------------------------------------

// StringEnumeration* U_EXPORT2
// BreakIterator::getAvailableLocales(void)
// {
//     ICULocaleService *service = getService();
//     if (service == NULL) {
//         return NULL;
//     }
//     return service->getAvailableLocales();
// }
// #endif /* UCONFIG_NO_SERVICE */

// // -------------------------------------

// BreakIterator*
// BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
// {
//     if (U_FAILURE(status)) {
//         return NULL;
//     }

// #if !UCONFIG_NO_SERVICE
//     if (hasService()) {
//         Locale actualLoc("");
//         BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
//         // TODO: The way the service code works in ICU 2.8 is that if
//         // there is a real registered break iterator, the actualLoc
//         // will be populated, but if the handleDefault path is taken
//         // (because nothing is registered that can handle the
//         // requested locale) then the actualLoc comes back empty.  In
//         // that case, the returned object already has its actual/valid
//         // locale data populated (by makeInstance, which is what
//         // handleDefault calls), so we don't touch it.  YES, A COMMENT
//         // THIS LONG is a sign of bad code -- so the action item is to
//         // revisit this in ICU 3.0 and clean it up/fix it/remove it.
//         if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
//             U_LOCALE_BASED(locBased, *result);
//             locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
//         }
//         return result;
//     }
//     else
// #endif
//     {
//         return makeInstance(loc, kind, status);
//     }
// }

// // -------------------------------------
// enum { kKeyValueLenMax = 32 };

// BreakIterator*
// BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
// {

//     if (U_FAILURE(status)) {
//         return NULL;
//     }
//     char lbType[kKeyValueLenMax];

//     BreakIterator *result = NULL;
//     switch (kind) {
//     case UBRK_CHARACTER:
//         result = BreakIterator::buildInstance(loc, "grapheme", status);
//         break;
//     case UBRK_WORD:
//         result = BreakIterator::buildInstance(loc, "word", status);
//         break;
//     case UBRK_LINE:
//         uprv_strcpy(lbType, "line");
//         {
//             char lbKeyValue[kKeyValueLenMax] = {0};
//             UErrorCode kvStatus = U_ZERO_ERROR;
//             int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
//             if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
//                 uprv_strcat(lbType, "_");
//                 uprv_strcat(lbType, lbKeyValue);
//             }
//         }
//         result = BreakIterator::buildInstance(loc, lbType, status);
//         break;
//     case UBRK_SENTENCE:
//         result = BreakIterator::buildInstance(loc, "sentence", status);
// #if !UCONFIG_NO_FILTERED_BREAK_ITERATION
//         {
//             char ssKeyValue[kKeyValueLenMax] = {0};
//             UErrorCode kvStatus = U_ZERO_ERROR;
//             int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
//             if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
//                 FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
//                 if (U_SUCCESS(kvStatus)) {
//                     result = fbiBuilder->build(result, status);
//                     delete fbiBuilder;
//                 }
//             }
//         }
// #endif
//         break;
//     case UBRK_TITLE:
//         result = BreakIterator::buildInstance(loc, "title", status);
//         break;
//     default:
//         status = U_ILLEGAL_ARGUMENT_ERROR;
//     }

//     if (U_FAILURE(status)) {
//         return NULL;
//     }

//     return result;
// }

// Locale
// BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
//     U_LOCALE_BASED(locBased, *this);
//     return locBased.getLocale(type, status);
// }

// const char *
// BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
//     U_LOCALE_BASED(locBased, *this);
//     return locBased.getLocaleID(type, status);
// }


// // This implementation of getRuleStatus is a do-nothing stub, here to
// // provide a default implementation for any derived BreakIterator classes that
// // do not implement it themselves.
// int32_t BreakIterator::getRuleStatus() const {
//     return 0;
// }

// // This implementation of getRuleStatusVec is a do-nothing stub, here to
// // provide a default implementation for any derived BreakIterator classes that
// // do not implement it themselves.
// int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
//     if (U_FAILURE(status)) {
//         return 0;
//     }
//     if (capacity < 1) {
//         status = U_BUFFER_OVERFLOW_ERROR;
//         return 1;
//     }
//     *fillInVec = 0;
//     return 1;
// }

// BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
//   U_LOCALE_BASED(locBased, (*this));
//   locBased.setLocaleIDs(valid, actual);
// }

// U_NAMESPACE_END

// #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

//eof
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// bytesinkutil.cpp
// created: 2017sep14 Markus W. Scherer



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// edits.h
// created: 2016dec30 Markus W. Scherer




#if U_SHOW_CPLUSPLUS_API



/**
 * \file
 * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
 */

U_NAMESPACE_BEGIN

class UnicodeString;

/**
 * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
 * in linear progression. Does not support moving/reordering of text.
 *
 * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
 * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
 * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
 * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
 * mapping between code points in the source and destination strings.
 *
 * After all edits have been added, instances of this class should be considered immutable, and an
 * {@link Edits::Iterator} can be used for queries.
 *
 * There are four flavors of Edits::Iterator:
 *
 * <ul>
 * <li>{@link #getFineIterator()} retains full granularity of change edits.
 * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
 * next() on the iterator, skips over no-change edits (unchanged regions).
 * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
 * edits are automatically merged during the construction phase.)
 * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
 * calling next() on the iterator, skips over no-change edits (unchanged regions).
 * </ul>
 *
 * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
 * following fine edits:
 * <ul>
 * <li>abc ⇨ abc (no-change)
 * <li>ß ⇨ ss (change)
 * <li>D ⇨ d (change)
 * <li>e ⇨ e (no-change)
 * <li>F ⇨ f (change)
 * </ul>
 * and the following coarse edits (note how adjacent change edits get merged together):
 * <ul>
 * <li>abc ⇨ abc (no-change)
 * <li>ßD ⇨ ssd (change)
 * <li>e ⇨ e (no-change)
 * <li>F ⇨ f (change)
 * </ul>
 *
 * The "fine changes" and "coarse changes" iterators will step through only the change edits when their
 * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
 * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
 * methods are used to walk through the string.
 *
 * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
 * UCharacterCaseTest.java.
 *
 * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
 * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
 *
 * @stable ICU 59
 */
class U_COMMON_API Edits U_FINAL : public UMemory {
public:
    /**
     * Constructs an empty object.
     * @stable ICU 59
     */
    Edits() :
            array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
            errorCode_(U_ZERO_ERROR) {}
    /**
     * Copy constructor.
     * @param other source edits
     * @stable ICU 60
     */
    Edits(const Edits &other) :
            array(stackArray), capacity(STACK_CAPACITY), length(other.length),
            delta(other.delta), numChanges(other.numChanges),
            errorCode_(other.errorCode_) {
        copyArray(other);
    }
    /**
     * Move constructor, might leave src empty.
     * This object will have the same contents that the source object had.
     * @param src source edits
     * @stable ICU 60
     */
    Edits(Edits &&src) U_NOEXCEPT :
            array(stackArray), capacity(STACK_CAPACITY), length(src.length),
            delta(src.delta), numChanges(src.numChanges),
            errorCode_(src.errorCode_) {
        moveArray(src);
    }

    /**
     * Destructor.
     * @stable ICU 59
     */
    ~Edits();

    /**
     * Assignment operator.
     * @param other source edits
     * @return *this
     * @stable ICU 60
     */
    Edits &operator=(const Edits &other);

    /**
     * Move assignment operator, might leave src empty.
     * This object will have the same contents that the source object had.
     * The behavior is undefined if *this and src are the same object.
     * @param src source edits
     * @return *this
     * @stable ICU 60
     */
    Edits &operator=(Edits &&src) U_NOEXCEPT;

    /**
     * Resets the data but may not release memory.
     * @stable ICU 59
     */
    void reset() U_NOEXCEPT;

    /**
     * Adds a no-change edit: a record for an unchanged segment of text.
     * Normally called from inside ICU string transformation functions, not user code.
     * @stable ICU 59
     */
    void addUnchanged(int32_t unchangedLength);
    /**
     * Adds a change edit: a record for a text replacement/insertion/deletion.
     * Normally called from inside ICU string transformation functions, not user code.
     * @stable ICU 59
     */
    void addReplace(int32_t oldLength, int32_t newLength);
    /**
     * Sets the UErrorCode if an error occurred while recording edits.
     * Preserves older error codes in the outErrorCode.
     * Normally called from inside ICU string transformation functions, not user code.
     * @param outErrorCode Set to an error code if it does not contain one already
     *                  and an error occurred while recording edits.
     *                  Otherwise unchanged.
     * @return TRUE if U_FAILURE(outErrorCode)
     * @stable ICU 59
     */
    UBool copyErrorTo(UErrorCode &outErrorCode) const;

    /**
     * How much longer is the new text compared with the old text?
     * @return new length minus old length
     * @stable ICU 59
     */
    int32_t lengthDelta() const { return delta; }
    /**
     * @return TRUE if there are any change edits
     * @stable ICU 59
     */
    UBool hasChanges() const { return numChanges != 0; }

    /**
     * @return the number of change edits
     * @stable ICU 60
     */
    int32_t numberOfChanges() const { return numChanges; }

    /**
     * Access to the list of edits.
     *
     * At any moment in time, an instance of this class points to a single edit: a "window" into a span
     * of the source string and the corresponding span of the destination string. The source string span
     * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
     * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
     *
     * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
     * and `findDestinationIndex(int32_t, UErrorCode &)` methods.
     * Calling any of these methods mutates the iterator to make it point to the corresponding edit.
     *
     * For more information, see the documentation for {@link Edits}.
     *
     * @see getCoarseIterator
     * @see getFineIterator
     * @stable ICU 59
     */
    struct U_COMMON_API Iterator U_FINAL : public UMemory {
        /**
         * Default constructor, empty iterator.
         * @stable ICU 60
         */
        Iterator() :
                array(nullptr), index(0), length(0),
                remaining(0), onlyChanges_(FALSE), coarse(FALSE),
                dir(0), changed(FALSE), oldLength_(0), newLength_(0),
                srcIndex(0), replIndex(0), destIndex(0) {}
        /**
         * Copy constructor.
         * @stable ICU 59
         */
        Iterator(const Iterator &other) = default;
        /**
         * Assignment operator.
         * @stable ICU 59
         */
        Iterator &operator=(const Iterator &other) = default;

        /**
         * Advances the iterator to the next edit.
         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
         *                  or else the function returns immediately. Check for U_FAILURE()
         *                  on output or use with function chaining. (See User Guide for details.)
         * @return TRUE if there is another edit
         * @stable ICU 59
         */
        UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }

        /**
         * Moves the iterator to the edit that contains the source index.
         * The source index may be found in a no-change edit
         * even if normal iteration would skip no-change edits.
         * Normal iteration can continue from a found edit.
         *
         * The iterator state before this search logically does not matter.
         * (It may affect the performance of the search.)
         *
         * The iterator state after this search is undefined
         * if the source index is out of bounds for the source string.
         *
         * @param i source index
         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
         *                  or else the function returns immediately. Check for U_FAILURE()
         *                  on output or use with function chaining. (See User Guide for details.)
         * @return TRUE if the edit for the source index was found
         * @stable ICU 59
         */
        UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
            return findIndex(i, TRUE, errorCode) == 0;
        }

        /**
         * Moves the iterator to the edit that contains the destination index.
         * The destination index may be found in a no-change edit
         * even if normal iteration would skip no-change edits.
         * Normal iteration can continue from a found edit.
         *
         * The iterator state before this search logically does not matter.
         * (It may affect the performance of the search.)
         *
         * The iterator state after this search is undefined
         * if the source index is out of bounds for the source string.
         *
         * @param i destination index
         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
         *                  or else the function returns immediately. Check for U_FAILURE()
         *                  on output or use with function chaining. (See User Guide for details.)
         * @return TRUE if the edit for the destination index was found
         * @stable ICU 60
         */
        UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
            return findIndex(i, FALSE, errorCode) == 0;
        }

        /**
         * Computes the destination index corresponding to the given source index.
         * If the source index is inside a change edit (not at its start),
         * then the destination index at the end of that edit is returned,
         * since there is no information about index mapping inside a change edit.
         *
         * (This means that indexes to the start and middle of an edit,
         * for example around a grapheme cluster, are mapped to indexes
         * encompassing the entire edit.
         * The alternative, mapping an interior index to the start,
         * would map such an interval to an empty one.)
         *
         * This operation will usually but not always modify this object.
         * The iterator state after this search is undefined.
         *
         * @param i source index
         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
         *                  or else the function returns immediately. Check for U_FAILURE()
         *                  on output or use with function chaining. (See User Guide for details.)
         * @return destination index; undefined if i is not 0..string length
         * @stable ICU 60
         */
        int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);

        /**
         * Computes the source index corresponding to the given destination index.
         * If the destination index is inside a change edit (not at its start),
         * then the source index at the end of that edit is returned,
         * since there is no information about index mapping inside a change edit.
         *
         * (This means that indexes to the start and middle of an edit,
         * for example around a grapheme cluster, are mapped to indexes
         * encompassing the entire edit.
         * The alternative, mapping an interior index to the start,
         * would map such an interval to an empty one.)
         *
         * This operation will usually but not always modify this object.
         * The iterator state after this search is undefined.
         *
         * @param i destination index
         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
         *                  or else the function returns immediately. Check for U_FAILURE()
         *                  on output or use with function chaining. (See User Guide for details.)
         * @return source index; undefined if i is not 0..string length
         * @stable ICU 60
         */
        int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);

        /**
         * Returns whether the edit currently represented by the iterator is a change edit.
         *
         * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
         *         FALSE if oldLength units remain unchanged.
         * @stable ICU 59
         */
        UBool hasChange() const { return changed; }

        /**
         * The length of the current span in the source string, which starts at {@link #sourceIndex}.
         *
         * @return the number of units in the original string which are replaced or remain unchanged.
         * @stable ICU 59
         */
        int32_t oldLength() const { return oldLength_; }

        /**
         * The length of the current span in the destination string, which starts at
         * {@link #destinationIndex}, or in the replacement string, which starts at
         * {@link #replacementIndex}.
         *
         * @return the number of units in the modified string, if hasChange() is TRUE.
         *         Same as oldLength if hasChange() is FALSE.
         * @stable ICU 59
         */
        int32_t newLength() const { return newLength_; }

        /**
         * The start index of the current span in the source string; the span has length
         * {@link #oldLength}.
         *
         * @return the current index into the source string
         * @stable ICU 59
         */
        int32_t sourceIndex() const { return srcIndex; }

        /**
         * The start index of the current span in the replacement string; the span has length
         * {@link #newLength}. Well-defined only if the current edit is a change edit.
         *
         * The *replacement string* is the concatenation of all substrings of the destination
         * string corresponding to change edits.
         *
         * This method is intended to be used together with operations that write only replacement
         * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
         * The source string can then be modified in-place.
         *
         * @return the current index into the replacement-characters-only string,
         *         not counting unchanged spans
         * @stable ICU 59
         */
        int32_t replacementIndex() const {
            // TODO: Throw an exception if we aren't in a change edit?
            return replIndex;
        }

        /**
         * The start index of the current span in the destination string; the span has length
         * {@link #newLength}.
         *
         * @return the current index into the full destination string
         * @stable ICU 59
         */
        int32_t destinationIndex() const { return destIndex; }

#ifndef U_HIDE_INTERNAL_API
        /**
         * A string representation of the current edit represented by the iterator for debugging. You
         * should not depend on the contents of the return string.
         * @internal
         */
        UnicodeString& toString(UnicodeString& appendTo) const;
#endif  // U_HIDE_INTERNAL_API

    private:
        friend class Edits;

        Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);

        int32_t readLength(int32_t head);
        void updateNextIndexes();
        void updatePreviousIndexes();
        UBool noNext();
        UBool next(UBool onlyChanges, UErrorCode &errorCode);
        UBool previous(UErrorCode &errorCode);
        /** @return -1: error or i<0; 0: found; 1: i>=string length */
        int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);

        const uint16_t *array;
        int32_t index, length;
        // 0 if we are not within compressed equal-length changes.
        // Otherwise the number of remaining changes, including the current one.
        int32_t remaining;
        UBool onlyChanges_, coarse;

        int8_t dir;  // iteration direction: back(<0), initial(0), forward(>0)
        UBool changed;
        int32_t oldLength_, newLength_;
        int32_t srcIndex, replIndex, destIndex;
    };

    /**
     * Returns an Iterator for coarse-grained change edits
     * (adjacent change edits are treated as one).
     * Can be used to perform simple string updates.
     * Skips no-change edits.
     * @return an Iterator that merges adjacent changes.
     * @stable ICU 59
     */
    Iterator getCoarseChangesIterator() const {
        return Iterator(array, length, TRUE, TRUE);
    }

    /**
     * Returns an Iterator for coarse-grained change and no-change edits
     * (adjacent change edits are treated as one).
     * Can be used to perform simple string updates.
     * Adjacent change edits are treated as one edit.
     * @return an Iterator that merges adjacent changes.
     * @stable ICU 59
     */
    Iterator getCoarseIterator() const {
        return Iterator(array, length, FALSE, TRUE);
    }

    /**
     * Returns an Iterator for fine-grained change edits
     * (full granularity of change edits is retained).
     * Can be used for modifying styled text.
     * Skips no-change edits.
     * @return an Iterator that separates adjacent changes.
     * @stable ICU 59
     */
    Iterator getFineChangesIterator() const {
        return Iterator(array, length, TRUE, FALSE);
    }

    /**
     * Returns an Iterator for fine-grained change and no-change edits
     * (full granularity of change edits is retained).
     * Can be used for modifying styled text.
     * @return an Iterator that separates adjacent changes.
     * @stable ICU 59
     */
    Iterator getFineIterator() const {
        return Iterator(array, length, FALSE, FALSE);
    }

    /**
     * Merges the two input Edits and appends the result to this object.
     *
     * Consider two string transformations (for example, normalization and case mapping)
     * where each records Edits in addition to writing an output string.<br>
     * Edits ab reflect how substrings of input string a
     * map to substrings of intermediate string b.<br>
     * Edits bc reflect how substrings of intermediate string b
     * map to substrings of output string c.<br>
     * This function merges ab and bc such that the additional edits
     * recorded in this object reflect how substrings of input string a
     * map to substrings of output string c.
     *
     * If unrelated Edits are passed in where the output string of the first
     * has a different length than the input string of the second,
     * then a U_ILLEGAL_ARGUMENT_ERROR is reported.
     *
     * @param ab reflects how substrings of input string a
     *     map to substrings of intermediate string b.
     * @param bc reflects how substrings of intermediate string b
     *     map to substrings of output string c.
     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
     *                  or else the function returns immediately. Check for U_FAILURE()
     *                  on output or use with function chaining. (See User Guide for details.)
     * @return *this, with the merged edits appended
     * @stable ICU 60
     */
    Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);

private:
    void releaseArray() U_NOEXCEPT;
    Edits &copyArray(const Edits &other);
    Edits &moveArray(Edits &src) U_NOEXCEPT;

    void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
    int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }

    void append(int32_t r);
    UBool growArray();

    static const int32_t STACK_CAPACITY = 100;
    uint16_t *array;
    int32_t capacity;
    int32_t length;
    int32_t delta;
    int32_t numChanges;
    UErrorCode errorCode_;
    uint16_t stackArray[STACK_CAPACITY];
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */





// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// bytesinkutil.h
// created: 2017sep14 Markus W. Scherer







U_NAMESPACE_BEGIN

class ByteSink;
class CharString;
class Edits;

class U_COMMON_API ByteSinkUtil {
public:
    ByteSinkUtil() = delete;  // all static

    /** (length) bytes were mapped to valid (s16, s16Length). */
    static UBool appendChange(int32_t length,
                              const char16_t *s16, int32_t s16Length,
                              ByteSink &sink, Edits *edits, UErrorCode &errorCode);

    /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
    static UBool appendChange(const uint8_t *s, const uint8_t *limit,
                              const char16_t *s16, int32_t s16Length,
                              ByteSink &sink, Edits *edits, UErrorCode &errorCode);

    /** (length) bytes were mapped/changed to valid code point c. */
    static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);

    /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
    static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
                                       ByteSink &sink, Edits *edits = nullptr) {
        appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
    }

    /** Append the two-byte character (U+0080..U+07FF). */
    static void appendTwoBytes(UChar32 c, ByteSink &sink);

    static UBool appendUnchanged(const uint8_t *s, int32_t length,
                                 ByteSink &sink, uint32_t options, Edits *edits,
                                 UErrorCode &errorCode) {
        if (U_FAILURE(errorCode)) { return FALSE; }
        if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
        return TRUE;
    }

    static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
                                 ByteSink &sink, uint32_t options, Edits *edits,
                                 UErrorCode &errorCode);

private:
    static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
                                        ByteSink &sink, uint32_t options, Edits *edits);
};

class U_COMMON_API CharStringByteSink : public ByteSink {
public:
    CharStringByteSink(CharString* dest);
    ~CharStringByteSink() override;

    CharStringByteSink() = delete;
    CharStringByteSink(const CharStringByteSink&) = delete;
    CharStringByteSink& operator=(const CharStringByteSink&) = delete;

    void Append(const char* bytes, int32_t n) override;

    char* GetAppendBuffer(int32_t min_capacity,
                          int32_t desired_capacity_hint,
                          char* scratch,
                          int32_t scratch_capacity,
                          int32_t* result_capacity) override;

private:
    CharString& dest_;
};

U_NAMESPACE_END

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (c) 2001-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/19/2001  aliu        Creation.
*   05/19/2010  markus      Rewritten from scratch
**********************************************************************
*/







U_NAMESPACE_BEGIN

// Windows needs us to DLL-export the MaybeStackArray template specialization,
// but MacOS X cannot handle it. Same as in digitlst.h.
#if !U_PLATFORM_IS_DARWIN_BASED
template class U_COMMON_API MaybeStackArray<char, 40>;
#endif

/**
 * ICU-internal char * string class.
 * This class does not assume or enforce any particular character encoding.
 * Raw bytes can be stored. The string object owns its characters.
 * A terminating NUL is stored, but the class does not prevent embedded NUL characters.
 *
 * This class wants to be convenient but is also deliberately minimalist.
 * Please do not add methods if they only add minor convenience.
 * For example:
 *   cs.data()[5]='a';  // no need for setCharAt(5, 'a')
 */
class U_COMMON_API CharString : public UMemory {
public:
    CharString() : len(0) { buffer[0]=0; }
    CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
        buffer[0]=0;
        append(s, errorCode);
    }
    CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
        buffer[0]=0;
        append(s, errorCode);
    }
    CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
        buffer[0]=0;
        append(s, sLength, errorCode);
    }
    ~CharString() {}

    /**
     * Move constructor; might leave src in an undefined state.
     * This string will have the same contents and state that the source string had.
     */
    CharString(CharString &&src) U_NOEXCEPT;
    /**
     * Move assignment operator; might leave src in an undefined state.
     * This string will have the same contents and state that the source string had.
     * The behavior is undefined if *this and src are the same object.
     */
    CharString &operator=(CharString &&src) U_NOEXCEPT;

    /**
     * Replaces this string's contents with the other string's contents.
     * CharString does not support the standard copy constructor nor
     * the assignment operator, to make copies explicit and to
     * use a UErrorCode where memory allocations might be needed.
     */
    CharString &copyFrom(const CharString &other, UErrorCode &errorCode);

    UBool isEmpty() const { return len==0; }
    int32_t length() const { return len; }
    char operator[](int32_t index) const { return buffer[index]; }
    StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }

    const char *data() const { return buffer.getAlias(); }
    char *data() { return buffer.getAlias(); }
    /**
     * Allocates length()+1 chars and copies the NUL-terminated data().
     * The caller must uprv_free() the result.
     */
    char *cloneData(UErrorCode &errorCode) const;

    bool operator==(StringPiece other) const {
        return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
    }
    bool operator!=(StringPiece other) const {
        return !operator==(other);
    }

    /** @return last index of c, or -1 if c is not in this string */
    int32_t lastIndexOf(char c) const;

    bool contains(StringPiece s) const;

    CharString &clear() { len=0; buffer[0]=0; return *this; }
    CharString &truncate(int32_t newLength);

    CharString &append(char c, UErrorCode &errorCode);
    CharString &append(StringPiece s, UErrorCode &errorCode) {
        return append(s.data(), s.length(), errorCode);
    }
    CharString &append(const CharString &s, UErrorCode &errorCode) {
        return append(s.data(), s.length(), errorCode);
    }
    CharString &append(const char *s, int32_t sLength, UErrorCode &status);
    /**
     * Returns a writable buffer for appending and writes the buffer's capacity to
     * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
     * There will additionally be space for a terminating NUL right at resultCapacity.
     * (This function is similar to ByteSink.GetAppendBuffer().)
     *
     * The returned buffer is only valid until the next write operation
     * on this string.
     *
     * After writing at most resultCapacity bytes, call append() with the
     * pointer returned from this function and the number of bytes written.
     *
     * @param minCapacity required minimum capacity of the returned buffer;
     *                    must be non-negative
     * @param desiredCapacityHint desired capacity of the returned buffer;
     *                            must be non-negative
     * @param resultCapacity will be set to the capacity of the returned buffer
     * @param errorCode in/out error code
     * @return a buffer with resultCapacity>=min_capacity
     */
    char *getAppendBuffer(int32_t minCapacity,
                          int32_t desiredCapacityHint,
                          int32_t &resultCapacity,
                          UErrorCode &errorCode);

    CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
    CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);

    /**
     * Appends a filename/path part, e.g., a directory name.
     * First appends a U_FILE_SEP_CHAR if necessary.
     * Does nothing if s is empty.
     */
    CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);

    /**
     * Appends a U_FILE_SEP_CHAR if this string is not empty
     * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
     */
    CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);

private:
    MaybeStackArray<char, 40> buffer;
    int32_t len;

    UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);

    CharString(const CharString &other); // forbid copying of this class
    CharString &operator=(const CharString &other); // forbid copying of this class
};

U_NAMESPACE_END





U_NAMESPACE_BEGIN

UBool
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return FALSE; }
    char scratch[200];
    int32_t s8Length = 0;
    for (int32_t i = 0; i < s16Length;) {
        int32_t capacity;
        int32_t desiredCapacity = s16Length - i;
        if (desiredCapacity < (INT32_MAX / 3)) {
            desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
        } else if (desiredCapacity < (INT32_MAX / 2)) {
            desiredCapacity *= 2;
        } else {
            desiredCapacity = INT32_MAX;
        }
        char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
                                            scratch, UPRV_LENGTHOF(scratch), &capacity);
        capacity -= U8_MAX_LENGTH - 1;
        int32_t j = 0;
        for (; i < s16Length && j < capacity;) {
            UChar32 c;
            U16_NEXT_UNSAFE(s16, i, c);
            U8_APPEND_UNSAFE(buffer, j, c);
        }
        if (j > (INT32_MAX - s8Length)) {
            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
            return FALSE;
        }
        sink.Append(buffer, j);
        s8Length += j;
    }
    if (edits != nullptr) {
        edits->addReplace(length, s8Length);
    }
    return TRUE;
}

UBool
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
                           const char16_t *s16, int32_t s16Length,
                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return FALSE; }
    if ((limit - s) > INT32_MAX) {
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
        return FALSE;
    }
    return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
}

void
ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
    char s8[U8_MAX_LENGTH];
    int32_t s8Length = 0;
    U8_APPEND_UNSAFE(s8, s8Length, c);
    if (edits != nullptr) {
        edits->addReplace(length, s8Length);
    }
    sink.Append(s8, s8Length);
}

// See unicode/utf8.h U8_APPEND_UNSAFE().
inline uint8_t bytesinkutil_getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t bytesinkutil_getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }

void
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
    U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
    char s8[2] = { (char)bytesinkutil_getTwoByteLead(c), (char)bytesinkutil_getTwoByteTrail(c) };
    sink.Append(s8, 2);
}

void
ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
                                      ByteSink &sink, uint32_t options, Edits *edits) {
    U_ASSERT(length > 0);
    if (edits != nullptr) {
        edits->addUnchanged(length);
    }
    if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
        sink.Append(reinterpret_cast<const char *>(s), length);
    }
}

UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
                              ByteSink &sink, uint32_t options, Edits *edits,
                              UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return FALSE; }
    if ((limit - s) > INT32_MAX) {
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
        return FALSE;
    }
    int32_t length = (int32_t)(limit - s);
    if (length > 0) {
        appendNonEmptyUnchanged(s, length, sink, options, edits);
    }
    return TRUE;
}

CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
}

CharStringByteSink::~CharStringByteSink() = default;

void
CharStringByteSink::Append(const char* bytes, int32_t n) {
    UErrorCode status = U_ZERO_ERROR;
    dest_.append(bytes, n, status);
    // Any errors are silently ignored.
}

char*
CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
                                    int32_t desired_capacity_hint,
                                    char* scratch,
                                    int32_t scratch_capacity,
                                    int32_t* result_capacity) {
    if (min_capacity < 1 || scratch_capacity < min_capacity) {
        *result_capacity = 0;
        return nullptr;
    }

    UErrorCode status = U_ZERO_ERROR;
    char* result = dest_.getAppendBuffer(
            min_capacity,
            desired_capacity_hint,
            *result_capacity,
            status);
    if (U_SUCCESS(status)) {
        return result;
    }

    *result_capacity = scratch_capacity;
    return scratch;
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Copyright (C) 2009-2011, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)





U_NAMESPACE_BEGIN

ByteSink::~ByteSink() {}

char* ByteSink::GetAppendBuffer(int32_t min_capacity,
                                int32_t /*desired_capacity_hint*/,
                                char* scratch, int32_t scratch_capacity,
                                int32_t* result_capacity) {
  if (min_capacity < 1 || scratch_capacity < min_capacity) {
    *result_capacity = 0;
    return NULL;
  }
  *result_capacity = scratch_capacity;
  return scratch;
}

void ByteSink::Flush() {}

CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
    : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
      size_(0), appended_(0), overflowed_(FALSE) {
}

CheckedArrayByteSink::~CheckedArrayByteSink() {}

CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
  size_ = appended_ = 0;
  overflowed_ = FALSE;
  return *this;
}

void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
  if (n <= 0) {
    return;
  }
  if (n > (INT32_MAX - appended_)) {
    // TODO: Report as integer overflow, not merely buffer overflow.
    appended_ = INT32_MAX;
    overflowed_ = TRUE;
    return;
  }
  appended_ += n;
  int32_t available = capacity_ - size_;
  if (n > available) {
    n = available;
    overflowed_ = TRUE;
  }
  if (n > 0 && bytes != (outbuf_ + size_)) {
    uprv_memcpy(outbuf_ + size_, bytes, n);
  }
  size_ += n;
}

char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
                                            int32_t /*desired_capacity_hint*/,
                                            char* scratch,
                                            int32_t scratch_capacity,
                                            int32_t* result_capacity) {
  if (min_capacity < 1 || scratch_capacity < min_capacity) {
    *result_capacity = 0;
    return NULL;
  }
  int32_t available = capacity_ - size_;
  if (available >= min_capacity) {
    *result_capacity = available;
    return outbuf_ + size_;
  } else {
    *result_capacity = scratch_capacity;
    return scratch;
  }
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  bytestrie.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010sep25
*   created by: Markus W. Scherer
*/



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  bytestrie.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010sep25
*   created by: Markus W. Scherer
*/


/**
 * \file
 * \brief C++ API: Trie for mapping byte sequences to integer values.
 */



#if U_SHOW_CPLUSPLUS_API



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  udicttrie.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010dec17
*   created by: Markus W. Scherer
*/


/**
 * \file
 * \brief C API: Helper definitions for dictionary trie APIs.
 */




/**
 * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
 * @see USTRINGTRIE_MATCHES
 * @see USTRINGTRIE_HAS_VALUE
 * @see USTRINGTRIE_HAS_NEXT
 * @stable ICU 4.8
 */
enum UStringTrieResult {
    /**
     * The input unit(s) did not continue a matching string.
     * Once current()/next() return USTRINGTRIE_NO_MATCH,
     * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
     * until the trie is reset to its original state or to a saved state.
     * @stable ICU 4.8
     */
    USTRINGTRIE_NO_MATCH,
    /**
     * The input unit(s) continued a matching string
     * but there is no value for the string so far.
     * (It is a prefix of a longer string.)
     * @stable ICU 4.8
     */
    USTRINGTRIE_NO_VALUE,
    /**
     * The input unit(s) continued a matching string
     * and there is a value for the string so far.
     * This value will be returned by getValue().
     * No further input byte/unit can continue a matching string.
     * @stable ICU 4.8
     */
    USTRINGTRIE_FINAL_VALUE,
    /**
     * The input unit(s) continued a matching string
     * and there is a value for the string so far.
     * This value will be returned by getValue().
     * Another input byte/unit can continue a matching string.
     * @stable ICU 4.8
     */
    USTRINGTRIE_INTERMEDIATE_VALUE
};

/**
 * Same as (result!=USTRINGTRIE_NO_MATCH).
 * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
 * @return true if the input bytes/units so far are part of a matching string/byte sequence.
 * @stable ICU 4.8
 */
#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)

/**
 * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
 * this macro evaluates result exactly once.
 * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
 * @return true if there is a value for the input bytes/units so far.
 * @see BytesTrie::getValue
 * @see UCharsTrie::getValue
 * @stable ICU 4.8
 */
#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)

/**
 * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
 * this macro evaluates result exactly once.
 * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
 * @return true if another input byte/unit can continue a matching string.
 * @stable ICU 4.8
 */
#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)



U_NAMESPACE_BEGIN

class ByteSink;
class BytesTrieBuilder;
class CharString;
class UVector32;

/**
 * Light-weight, non-const reader class for a BytesTrie.
 * Traverses a byte-serialized data structure with minimal state,
 * for mapping byte sequences to non-negative integer values.
 *
 * This class owns the serialized trie data only if it was constructed by
 * the builder's build() method.
 * The public constructor and the copy constructor only alias the data (only copy the pointer).
 * There is no assignment operator.
 *
 * This class is not intended for public subclassing.
 * @stable ICU 4.8
 */
class U_COMMON_API BytesTrie : public UMemory {
public:
    /**
     * Constructs a BytesTrie reader instance.
     *
     * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder,
     * starting with the first byte of that sequence.
     * The BytesTrie object will not read more bytes than
     * the BytesTrieBuilder generated in the corresponding build() call.
     *
     * The array is not copied/cloned and must not be modified while
     * the BytesTrie object is in use.
     *
     * @param trieBytes The byte array that contains the serialized trie.
     * @stable ICU 4.8
     */
    BytesTrie(const void *trieBytes)
            : ownedArray_(NULL), bytes_(static_cast<const uint8_t *>(trieBytes)),
              pos_(bytes_), remainingMatchLength_(-1) {}

    /**
     * Destructor.
     * @stable ICU 4.8
     */
    ~BytesTrie();

    /**
     * Copy constructor, copies the other trie reader object and its state,
     * but not the byte array which will be shared. (Shallow copy.)
     * @param other Another BytesTrie object.
     * @stable ICU 4.8
     */
    BytesTrie(const BytesTrie &other)
            : ownedArray_(NULL), bytes_(other.bytes_),
              pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}

    /**
     * Resets this trie to its initial state.
     * @return *this
     * @stable ICU 4.8
     */
    BytesTrie &reset() {
        pos_=bytes_;
        remainingMatchLength_=-1;
        return *this;
    }

#ifndef U_HIDE_DRAFT_API
    /**
     * Returns the state of this trie as a 64-bit integer.
     * The state value is never 0.
     *
     * @return opaque state value
     * @see resetToState64
     * @draft ICU 65
     */
    uint64_t getState64() const {
        return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
            (uint64_t)(pos_ - bytes_);
    }

    /**
     * Resets this trie to the saved state.
     * Unlike resetToState(State), the 64-bit state value
     * must be from getState64() from the same trie object or
     * from one initialized the exact same way.
     * Because of no validation, this method is faster.
     *
     * @param state The opaque trie state value from getState64().
     * @return *this
     * @see getState64
     * @see resetToState
     * @see reset
     * @draft ICU 65
     */
    BytesTrie &resetToState64(uint64_t state) {
        remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
        pos_ = bytes_ + (state & kState64PosMask);
        return *this;
    }
#endif  /* U_HIDE_DRAFT_API */

    /**
     * BytesTrie state object, for saving a trie's current state
     * and resetting the trie back to this state later.
     * @stable ICU 4.8
     */
    class State : public UMemory {
    public:
        /**
         * Constructs an empty State.
         * @stable ICU 4.8
         */
        State() { bytes=NULL; }
    private:
        friend class BytesTrie;

        const uint8_t *bytes;
        const uint8_t *pos;
        int32_t remainingMatchLength;
    };

    /**
     * Saves the state of this trie.
     * @param state The State object to hold the trie's state.
     * @return *this
     * @see resetToState
     * @stable ICU 4.8
     */
    const BytesTrie &saveState(State &state) const {
        state.bytes=bytes_;
        state.pos=pos_;
        state.remainingMatchLength=remainingMatchLength_;
        return *this;
    }

    /**
     * Resets this trie to the saved state.
     * If the state object contains no state, or the state of a different trie,
     * then this trie remains unchanged.
     * @param state The State object which holds a saved trie state.
     * @return *this
     * @see saveState
     * @see reset
     * @stable ICU 4.8
     */
    BytesTrie &resetToState(const State &state) {
        if(bytes_==state.bytes && bytes_!=NULL) {
            pos_=state.pos;
            remainingMatchLength_=state.remainingMatchLength;
        }
        return *this;
    }

    /**
     * Determines whether the byte sequence so far matches, whether it has a value,
     * and whether another input byte can continue a matching byte sequence.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    UStringTrieResult current() const;

    /**
     * Traverses the trie from the initial state for this input byte.
     * Equivalent to reset().next(inByte).
     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
     *               Values below -0x100 and above 0xff will never match.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    inline UStringTrieResult first(int32_t inByte) {
        remainingMatchLength_=-1;
        if(inByte<0) {
            inByte+=0x100;
        }
        return nextImpl(bytes_, inByte);
    }

    /**
     * Traverses the trie from the current state for this input byte.
     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
     *               Values below -0x100 and above 0xff will never match.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    UStringTrieResult next(int32_t inByte);

    /**
     * Traverses the trie from the current state for this byte sequence.
     * Equivalent to
     * \code
     * Result result=current();
     * for(each c in s)
     *   if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
     *   result=next(c);
     * return result;
     * \endcode
     * @param s A string or byte sequence. Can be NULL if length is 0.
     * @param length The length of the byte sequence. Can be -1 if NUL-terminated.
     * @return The match/value Result.
     * @stable ICU 4.8
     */
    UStringTrieResult next(const char *s, int32_t length);

    /**
     * Returns a matching byte sequence's value if called immediately after
     * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
     * getValue() can be called multiple times.
     *
     * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
     * @return The value for the byte sequence so far.
     * @stable ICU 4.8
     */
    inline int32_t getValue() const {
        const uint8_t *pos=pos_;
        int32_t leadByte=*pos++;
        // U_ASSERT(leadByte>=kMinValueLead);
        return readValue(pos, leadByte>>1);
    }

    /**
     * Determines whether all byte sequences reachable from the current state
     * map to the same value.
     * @param uniqueValue Receives the unique value, if this function returns TRUE.
     *                    (output-only)
     * @return TRUE if all byte sequences reachable from the current state
     *         map to the same value.
     * @stable ICU 4.8
     */
    inline UBool hasUniqueValue(int32_t &uniqueValue) const {
        const uint8_t *pos=pos_;
        // Skip the rest of a pending linear-match node.
        return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
    }

    /**
     * Finds each byte which continues the byte sequence from the current state.
     * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
     * @param out Each next byte is appended to this object.
     *            (Only uses the out.Append(s, length) method.)
     * @return the number of bytes which continue the byte sequence from here
     * @stable ICU 4.8
     */
    int32_t getNextBytes(ByteSink &out) const;

    /**
     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
     * @stable ICU 4.8
     */
    class U_COMMON_API Iterator : public UMemory {
    public:
        /**
         * Iterates from the root of a byte-serialized BytesTrie.
         * @param trieBytes The trie bytes.
         * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
         *                        Otherwise, the iterator returns strings with this maximum length.
         * @param errorCode Standard ICU error code. Its input value must
         *                  pass the U_SUCCESS() test, or else the function returns
         *                  immediately. Check for U_FAILURE() on output or use with
         *                  function chaining. (See User Guide for details.)
         * @stable ICU 4.8
         */
        Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);

        /**
         * Iterates from the current state of the specified BytesTrie.
         * @param trie The trie whose state will be copied for iteration.
         * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
         *                        Otherwise, the iterator returns strings with this maximum length.
         * @param errorCode Standard ICU error code. Its input value must
         *                  pass the U_SUCCESS() test, or else the function returns
         *                  immediately. Check for U_FAILURE() on output or use with
         *                  function chaining. (See User Guide for details.)
         * @stable ICU 4.8
         */
        Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);

        /**
         * Destructor.
         * @stable ICU 4.8
         */
        ~Iterator();

        /**
         * Resets this iterator to its initial state.
         * @return *this
         * @stable ICU 4.8
         */
        Iterator &reset();

        /**
         * @return TRUE if there are more elements.
         * @stable ICU 4.8
         */
        UBool hasNext() const;

        /**
         * Finds the next (byte sequence, value) pair if there is one.
         *
         * If the byte sequence is truncated to the maximum length and does not
         * have a real value, then the value is set to -1.
         * In this case, this "not a real value" is indistinguishable from
         * a real value of -1.
         * @param errorCode Standard ICU error code. Its input value must
         *                  pass the U_SUCCESS() test, or else the function returns
         *                  immediately. Check for U_FAILURE() on output or use with
         *                  function chaining. (See User Guide for details.)
         * @return TRUE if there is another element.
         * @stable ICU 4.8
         */
        UBool next(UErrorCode &errorCode);

        /**
         * @return The NUL-terminated byte sequence for the last successful next().
         * @stable ICU 4.8
         */
        StringPiece getString() const;
        /**
         * @return The value for the last successful next().
         * @stable ICU 4.8
         */
        int32_t getValue() const { return value_; }

    private:
        UBool truncateAndStop();

        const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);

        const uint8_t *bytes_;
        const uint8_t *pos_;
        const uint8_t *initialPos_;
        int32_t remainingMatchLength_;
        int32_t initialRemainingMatchLength_;

        CharString *str_;
        int32_t maxLength_;
        int32_t value_;

        // The stack stores pairs of integers for backtracking to another
        // outbound edge of a branch node.
        // The first integer is an offset from bytes_.
        // The second integer has the str_->length() from before the node in bits 15..0,
        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
        // but the code looks more confusing that way.)
        UVector32 *stack_;
    };

private:
    friend class BytesTrieBuilder;

    /**
     * Constructs a BytesTrie reader instance.
     * Unlike the public constructor which just aliases an array,
     * this constructor adopts the builder's array.
     * This constructor is only called by the builder.
     */
    BytesTrie(void *adoptBytes, const void *trieBytes)
            : ownedArray_(static_cast<uint8_t *>(adoptBytes)),
              bytes_(static_cast<const uint8_t *>(trieBytes)),
              pos_(bytes_), remainingMatchLength_(-1) {}

    // No assignment operator.
    BytesTrie &operator=(const BytesTrie &other);

    inline void stop() {
        pos_=NULL;
    }

    // Reads a compact 32-bit integer.
    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
    static int32_t readValue(const uint8_t *pos, int32_t leadByte);
    static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) {
        // U_ASSERT(leadByte>=kMinValueLead);
        if(leadByte>=(kMinTwoByteValueLead<<1)) {
            if(leadByte<(kMinThreeByteValueLead<<1)) {
                ++pos;
            } else if(leadByte<(kFourByteValueLead<<1)) {
                pos+=2;
            } else {
                pos+=3+((leadByte>>1)&1);
            }
        }
        return pos;
    }
    static inline const uint8_t *skipValue(const uint8_t *pos) {
        int32_t leadByte=*pos++;
        return skipValue(pos, leadByte);
    }

    // Reads a jump delta and jumps.
    static const uint8_t *jumpByDelta(const uint8_t *pos);

    static inline const uint8_t *skipDelta(const uint8_t *pos) {
        int32_t delta=*pos++;
        if(delta>=kMinTwoByteDeltaLead) {
            if(delta<kMinThreeByteDeltaLead) {
                ++pos;
            } else if(delta<kFourByteDeltaLead) {
                pos+=2;
            } else {
                pos+=3+(delta&1);
            }
        }
        return pos;
    }

    static inline UStringTrieResult valueResult(int32_t node) {
        return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
    }

    // Handles a branch node for both next(byte) and next(string).
    UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);

    // Requires remainingLength_<0.
    UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);

    // Helper functions for hasUniqueValue().
    // Recursively finds a unique value (or whether there is not a unique one)
    // from a branch.
    static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
                                                    UBool haveUniqueValue, int32_t &uniqueValue);
    // Recursively finds a unique value (or whether there is not a unique one)
    // starting from a position on a node lead byte.
    static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);

    // Helper functions for getNextBytes().
    // getNextBytes() when pos is on a branch node.
    static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
    static void append(ByteSink &out, int c);

    // BytesTrie data structure
    //
    // The trie consists of a series of byte-serialized nodes for incremental
    // string/byte sequence matching. The root node is at the beginning of the trie data.
    //
    // Types of nodes are distinguished by their node lead byte ranges.
    // After each node, except a final-value node, another node follows to
    // encode match values or continue matching further bytes.
    //
    // Node types:
    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
    //    The value is for the string/byte sequence so far.
    //    One node bit indicates whether the value is final or whether
    //    matching continues with the next node.
    //  - Linear-match node: Matches a number of bytes.
    //  - Branch node: Branches to other nodes according to the current input byte.
    //    The node byte is the length of the branch (number of bytes to select from)
    //    minus 1. It is followed by a sub-node:
    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
    //      there are length-1 (key, value) pairs and then one more comparison byte.
    //      If one of the key bytes matches, then the value is either a final value for
    //      the string/byte sequence so far, or a "jump" delta to the next node.
    //      If the last byte matches, then matching continues with the next node.
    //      (Values have the same encoding as value nodes.)
    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
    //      there is one byte and one "jump" delta.
    //      If the input byte is less than the sub-node byte, then "jump" by delta to
    //      the next sub-node which will have a length of length/2.
    //      (The delta has its own compact encoding.)
    //      Otherwise, skip the "jump" delta to the next sub-node
    //      which will have a length of length-length/2.

    // Node lead byte values.

    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
    // the length is one more than the next byte.

    // For a branch sub-node with at most this many entries, we drop down
    // to a linear search.
    static const int32_t kMaxBranchLinearSubNodeLength=5;

    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
    static const int32_t kMinLinearMatch=0x10;
    static const int32_t kMaxLinearMatchLength=0x10;

    // 20..ff: Variable-length value node.
    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
    // Then shift-right by 1 bit.
    // The remaining lead byte value indicates the number of following bytes (0..4)
    // and contains the value's top bits.
    static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
    // It is a final value if bit 0 is set.
    static const int32_t kValueIsFinal=1;

    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
    static const int32_t kMinOneByteValueLead=kMinValueLead/2;  // 0x10
    static const int32_t kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.

    static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
    static const int32_t kMaxTwoByteValue=0x1aff;

    static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
    static const int32_t kFourByteValueLead=0x7e;

    // A little more than Unicode code points. (0x11ffff)
    static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;

    static const int32_t kFiveByteValueLead=0x7f;

    // Compact delta integers.
    static const int32_t kMaxOneByteDelta=0xbf;
    static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
    static const int32_t kMinThreeByteDeltaLead=0xf0;
    static const int32_t kFourByteDeltaLead=0xfe;
    static const int32_t kFiveByteDeltaLead=0xff;

    static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
    static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff

    // For getState64():
    // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
    // so we need at least 5 bits for that.
    // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
    static constexpr int32_t kState64RemainingShift = 59;
    static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;

    uint8_t *ownedArray_;

    // Fixed value referencing the BytesTrie bytes.
    const uint8_t *bytes_;

    // Iterator variables.

    // Pointer to next trie byte to read. NULL if no more matches.
    const uint8_t *pos_;
    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
    int32_t remainingMatchLength_;
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */






U_NAMESPACE_BEGIN

BytesTrie::~BytesTrie() {
    uprv_free(ownedArray_);
}

// lead byte already shifted right by 1.
int32_t
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
    int32_t value;
    if(leadByte<kMinTwoByteValueLead) {
        value=leadByte-kMinOneByteValueLead;
    } else if(leadByte<kMinThreeByteValueLead) {
        value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
    } else if(leadByte<kFourByteValueLead) {
        value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
    } else if(leadByte==kFourByteValueLead) {
        value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
    } else {
        value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
    }
    return value;
}

const uint8_t *
BytesTrie::jumpByDelta(const uint8_t *pos) {
    int32_t delta=*pos++;
    if(delta<kMinTwoByteDeltaLead) {
        // nothing to do
    } else if(delta<kMinThreeByteDeltaLead) {
        delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
    } else if(delta<kFourByteDeltaLead) {
        delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
        pos+=2;
    } else if(delta==kFourByteDeltaLead) {
        delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
        pos+=3;
    } else {
        delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
        pos+=4;
    }
    return pos+delta;
}

UStringTrieResult
BytesTrie::current() const {
    const uint8_t *pos=pos_;
    if(pos==NULL) {
        return USTRINGTRIE_NO_MATCH;
    } else {
        int32_t node;
        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
                valueResult(node) : USTRINGTRIE_NO_VALUE;
    }
}

UStringTrieResult
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
    // Branch according to the current byte.
    if(length==0) {
        length=*pos++;
    }
    ++length;
    // The length of the branch is the number of bytes to select from.
    // The data structure encodes a binary search.
    while(length>kMaxBranchLinearSubNodeLength) {
        if(inByte<*pos++) {
            length>>=1;
            pos=jumpByDelta(pos);
        } else {
            length=length-(length>>1);
            pos=skipDelta(pos);
        }
    }
    // Drop down to linear search for the last few bytes.
    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
    // and divides length by 2.
    do {
        if(inByte==*pos++) {
            UStringTrieResult result;
            int32_t node=*pos;
            U_ASSERT(node>=kMinValueLead);
            if(node&kValueIsFinal) {
                // Leave the final value for getValue() to read.
                result=USTRINGTRIE_FINAL_VALUE;
            } else {
                // Use the non-final value as the jump delta.
                ++pos;
                // int32_t delta=readValue(pos, node>>1);
                node>>=1;
                int32_t delta;
                if(node<kMinTwoByteValueLead) {
                    delta=node-kMinOneByteValueLead;
                } else if(node<kMinThreeByteValueLead) {
                    delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
                } else if(node<kFourByteValueLead) {
                    delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
                    pos+=2;
                } else if(node==kFourByteValueLead) {
                    delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
                    pos+=3;
                } else {
                    delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
                    pos+=4;
                }
                // end readValue()
                pos+=delta;
                node=*pos;
                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
            }
            pos_=pos;
            return result;
        }
        --length;
        pos=skipValue(pos);
    } while(length>1);
    if(inByte==*pos++) {
        pos_=pos;
        int32_t node=*pos;
        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
    } else {
        stop();
        return USTRINGTRIE_NO_MATCH;
    }
}

UStringTrieResult
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
    for(;;) {
        int32_t node=*pos++;
        if(node<kMinLinearMatch) {
            return branchNext(pos, node, inByte);
        } else if(node<kMinValueLead) {
            // Match the first of length+1 bytes.
            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
            if(inByte==*pos++) {
                remainingMatchLength_=--length;
                pos_=pos;
                return (length<0 && (node=*pos)>=kMinValueLead) ?
                        valueResult(node) : USTRINGTRIE_NO_VALUE;
            } else {
                // No match.
                break;
            }
        } else if(node&kValueIsFinal) {
            // No further matching bytes.
            break;
        } else {
            // Skip intermediate value.
            pos=skipValue(pos, node);
            // The next node must not also be a value node.
            U_ASSERT(*pos<kMinValueLead);
        }
    }
    stop();
    return USTRINGTRIE_NO_MATCH;
}

UStringTrieResult
BytesTrie::next(int32_t inByte) {
    const uint8_t *pos=pos_;
    if(pos==NULL) {
        return USTRINGTRIE_NO_MATCH;
    }
    if(inByte<0) {
        inByte+=0x100;
    }
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    if(length>=0) {
        // Remaining part of a linear-match node.
        if(inByte==*pos++) {
            remainingMatchLength_=--length;
            pos_=pos;
            int32_t node;
            return (length<0 && (node=*pos)>=kMinValueLead) ?
                    valueResult(node) : USTRINGTRIE_NO_VALUE;
        } else {
            stop();
            return USTRINGTRIE_NO_MATCH;
        }
    }
    return nextImpl(pos, inByte);
}

UStringTrieResult
BytesTrie::next(const char *s, int32_t sLength) {
    if(sLength<0 ? *s==0 : sLength==0) {
        // Empty input.
        return current();
    }
    const uint8_t *pos=pos_;
    if(pos==NULL) {
        return USTRINGTRIE_NO_MATCH;
    }
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    for(;;) {
        // Fetch the next input byte, if there is one.
        // Continue a linear-match node without rechecking sLength<0.
        int32_t inByte;
        if(sLength<0) {
            for(;;) {
                if((inByte=*s++)==0) {
                    remainingMatchLength_=length;
                    pos_=pos;
                    int32_t node;
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
                }
                if(length<0) {
                    remainingMatchLength_=length;
                    break;
                }
                if(inByte!=*pos) {
                    stop();
                    return USTRINGTRIE_NO_MATCH;
                }
                ++pos;
                --length;
            }
        } else {
            for(;;) {
                if(sLength==0) {
                    remainingMatchLength_=length;
                    pos_=pos;
                    int32_t node;
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
                }
                inByte=*s++;
                --sLength;
                if(length<0) {
                    remainingMatchLength_=length;
                    break;
                }
                if(inByte!=*pos) {
                    stop();
                    return USTRINGTRIE_NO_MATCH;
                }
                ++pos;
                --length;
            }
        }
        for(;;) {
            int32_t node=*pos++;
            if(node<kMinLinearMatch) {
                UStringTrieResult result=branchNext(pos, node, inByte);
                if(result==USTRINGTRIE_NO_MATCH) {
                    return USTRINGTRIE_NO_MATCH;
                }
                // Fetch the next input byte, if there is one.
                if(sLength<0) {
                    if((inByte=*s++)==0) {
                        return result;
                    }
                } else {
                    if(sLength==0) {
                        return result;
                    }
                    inByte=*s++;
                    --sLength;
                }
                if(result==USTRINGTRIE_FINAL_VALUE) {
                    // No further matching bytes.
                    stop();
                    return USTRINGTRIE_NO_MATCH;
                }
                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
            } else if(node<kMinValueLead) {
                // Match length+1 bytes.
                length=node-kMinLinearMatch;  // Actual match length minus 1.
                if(inByte!=*pos) {
                    stop();
                    return USTRINGTRIE_NO_MATCH;
                }
                ++pos;
                --length;
                break;
            } else if(node&kValueIsFinal) {
                // No further matching bytes.
                stop();
                return USTRINGTRIE_NO_MATCH;
            } else {
                // Skip intermediate value.
                pos=skipValue(pos, node);
                // The next node must not also be a value node.
                U_ASSERT(*pos<kMinValueLead);
            }
        }
    }
}

const uint8_t *
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
                                     UBool haveUniqueValue, int32_t &uniqueValue) {
    while(length>kMaxBranchLinearSubNodeLength) {
        ++pos;  // ignore the comparison byte
        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
            return NULL;
        }
        length=length-(length>>1);
        pos=skipDelta(pos);
    }
    do {
        ++pos;  // ignore a comparison byte
        // handle its value
        int32_t node=*pos++;
        UBool isFinal=(UBool)(node&kValueIsFinal);
        int32_t value=readValue(pos, node>>1);
        pos=skipValue(pos, node);
        if(isFinal) {
            if(haveUniqueValue) {
                if(value!=uniqueValue) {
                    return NULL;
                }
            } else {
                uniqueValue=value;
                haveUniqueValue=TRUE;
            }
        } else {
            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
                return NULL;
            }
            haveUniqueValue=TRUE;
        }
    } while(--length>1);
    return pos+1;  // ignore the last comparison byte
}

UBool
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
    for(;;) {
        int32_t node=*pos++;
        if(node<kMinLinearMatch) {
            if(node==0) {
                node=*pos++;
            }
            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
            if(pos==NULL) {
                return FALSE;
            }
            haveUniqueValue=TRUE;
        } else if(node<kMinValueLead) {
            // linear-match node
            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
        } else {
            UBool isFinal=(UBool)(node&kValueIsFinal);
            int32_t value=readValue(pos, node>>1);
            if(haveUniqueValue) {
                if(value!=uniqueValue) {
                    return FALSE;
                }
            } else {
                uniqueValue=value;
                haveUniqueValue=TRUE;
            }
            if(isFinal) {
                return TRUE;
            }
            pos=skipValue(pos, node);
        }
    }
}

int32_t
BytesTrie::getNextBytes(ByteSink &out) const {
    const uint8_t *pos=pos_;
    if(pos==NULL) {
        return 0;
    }
    if(remainingMatchLength_>=0) {
        append(out, *pos);  // Next byte of a pending linear-match node.
        return 1;
    }
    int32_t node=*pos++;
    if(node>=kMinValueLead) {
        if(node&kValueIsFinal) {
            return 0;
        } else {
            pos=skipValue(pos, node);
            node=*pos++;
            U_ASSERT(node<kMinValueLead);
        }
    }
    if(node<kMinLinearMatch) {
        if(node==0) {
            node=*pos++;
        }
        getNextBranchBytes(pos, ++node, out);
        return node;
    } else {
        // First byte of the linear-match node.
        append(out, *pos);
        return 1;
    }
}

void
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
    while(length>kMaxBranchLinearSubNodeLength) {
        ++pos;  // ignore the comparison byte
        getNextBranchBytes(jumpByDelta(pos), length>>1, out);
        length=length-(length>>1);
        pos=skipDelta(pos);
    }
    do {
        append(out, *pos++);
        pos=skipValue(pos);
    } while(--length>1);
    append(out, *pos);
}

void
BytesTrie::append(ByteSink &out, int c) {
    char ch=(char)c;
    out.Append(&ch, 1);
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  bytestriebuilder.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010sep25
*   created by: Markus W. Scherer
*/



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  bytestriebuilder.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010sep25
*   created by: Markus W. Scherer
*/

/**
 * \file
 * \brief C++ API: Builder for icu::BytesTrie
 */




#if U_SHOW_CPLUSPLUS_API



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2012,2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  stringtriebuilder.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010dec24
*   created by: Markus W. Scherer
*/




#if U_SHOW_CPLUSPLUS_API



/**
 * \file
 * \brief C++ API: Builder API for trie builders
 */

// Forward declaration.
/// \cond
struct UHashtable;
typedef struct UHashtable UHashtable;
/// \endcond

/**
 * Build options for BytesTrieBuilder and CharsTrieBuilder.
 * @stable ICU 4.8
 */
enum UStringTrieBuildOption {
    /**
     * Builds a trie quickly.
     * @stable ICU 4.8
     */
    USTRINGTRIE_BUILD_FAST,
    /**
     * Builds a trie more slowly, attempting to generate
     * a shorter but equivalent serialization.
     * This build option also uses more memory.
     *
     * This option can be effective when many integer values are the same
     * and string/byte sequence suffixes can be shared.
     * Runtime speed is not expected to improve.
     * @stable ICU 4.8
     */
    USTRINGTRIE_BUILD_SMALL
};

U_NAMESPACE_BEGIN

/**
 * Base class for string trie builder classes.
 *
 * This class is not intended for public subclassing.
 * @stable ICU 4.8
 */
class U_COMMON_API StringTrieBuilder : public UObject {
public:
#ifndef U_HIDE_INTERNAL_API
    /** @internal */
    static int32_t hashNode(const void *node);
    /** @internal */
    static UBool equalNodes(const void *left, const void *right);
#endif  /* U_HIDE_INTERNAL_API */

protected:
    // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
    // or else the compiler will create a public default constructor.
    /** @internal */
    StringTrieBuilder();
    /** @internal */
    virtual ~StringTrieBuilder();

#ifndef U_HIDE_INTERNAL_API
    /** @internal */
    void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
    /** @internal */
    void deleteCompactBuilder();

    /** @internal */
    void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);

    /** @internal */
    int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
    /** @internal */
    int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
#endif  /* U_HIDE_INTERNAL_API */

    class Node;

#ifndef U_HIDE_INTERNAL_API
    /** @internal */
    Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
    /** @internal */
    Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
                            int32_t length, UErrorCode &errorCode);
#endif  /* U_HIDE_INTERNAL_API */

    /** @internal */
    virtual int32_t getElementStringLength(int32_t i) const = 0;
    /** @internal */
    virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
    /** @internal */
    virtual int32_t getElementValue(int32_t i) const = 0;

    // Finds the first unit index after this one where
    // the first and last element have different units again.
    /** @internal */
    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;

    // Number of different units at unitIndex.
    /** @internal */
    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
    /** @internal */
    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
    /** @internal */
    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;

    /** @internal */
    virtual UBool matchNodesCanHaveValues() const = 0;

    /** @internal */
    virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
    /** @internal */
    virtual int32_t getMinLinearMatch() const = 0;
    /** @internal */
    virtual int32_t getMaxLinearMatchLength() const = 0;

#ifndef U_HIDE_INTERNAL_API
    // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
    /** @internal */
    static const int32_t kMaxBranchLinearSubNodeLength=5;

    // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
    // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
    /** @internal */
    static const int32_t kMaxSplitBranchLevels=14;

    /**
     * Makes sure that there is only one unique node registered that is
     * equivalent to newNode.
     * @param newNode Input node. The builder takes ownership.
     * @param errorCode ICU in/out UErrorCode.
                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
     * @return newNode if it is the first of its kind, or
     *         an equivalent node if newNode is a duplicate.
     * @internal
     */
    Node *registerNode(Node *newNode, UErrorCode &errorCode);
    /**
     * Makes sure that there is only one unique FinalValueNode registered
     * with this value.
     * Avoids creating a node if the value is a duplicate.
     * @param value A final value.
     * @param errorCode ICU in/out UErrorCode.
                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
     * @return A FinalValueNode with the given value.
     * @internal
     */
    Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
#endif  /* U_HIDE_INTERNAL_API */

    /*
     * C++ note:
     * registerNode() and registerFinalValue() take ownership of their input nodes,
     * and only return owned nodes.
     * If they see a failure UErrorCode, they will delete the input node.
     * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
     * If there is a failure, they return NULL.
     *
     * NULL Node pointers can be safely passed into other Nodes because
     * they call the static Node::hashCode() which checks for a NULL pointer first.
     *
     * Therefore, as long as builder functions register a new node,
     * they need to check for failures only before explicitly dereferencing
     * a Node pointer, or before setting a new UErrorCode.
     */

    // Hash set of nodes, maps from nodes to integer 1.
    /** @internal */
    UHashtable *nodes;

    // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
    // it is needed for layout of other objects.
    /**
     * @internal
     * \cond
     */
    class Node : public UObject {
    public:
        Node(int32_t initialHash) : hash(initialHash), offset(0) {}
        inline int32_t hashCode() const { return hash; }
        // Handles node==NULL.
        static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
        // Base class operator==() compares the actual class types.
        virtual UBool operator==(const Node &other) const;
        inline UBool operator!=(const Node &other) const { return !operator==(other); }
        /**
         * Traverses the Node graph and numbers branch edges, with rightmost edges first.
         * This is to avoid writing a duplicate node twice.
         *
         * Branch nodes in this trie data structure are not symmetric.
         * Most branch edges "jump" to other nodes but the rightmost branch edges
         * just continue without a jump.
         * Therefore, write() must write the rightmost branch edge last
         * (trie units are written backwards), and must write it at that point even if
         * it is a duplicate of a node previously written elsewhere.
         *
         * This function visits and marks right branch edges first.
         * Edges are numbered with increasingly negative values because we share the
         * offset field which gets positive values when nodes are written.
         * A branch edge also remembers the first number for any of its edges.
         *
         * When a further-left branch edge has a number in the range of the rightmost
         * edge's numbers, then it will be written as part of the required right edge
         * and we can avoid writing it first.
         *
         * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
         * edge numbers.
         *
         * @param edgeNumber The first edge number for this node and its sub-nodes.
         * @return An edge number that is at least the maximum-negative
         *         of the input edge number and the numbers of this node and all of its sub-nodes.
         */
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
        // write() must set the offset to a positive value.
        virtual void write(StringTrieBuilder &builder) = 0;
        // See markRightEdgesFirst.
        inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
                                               StringTrieBuilder &builder) {
            // Note: Edge numbers are negative, lastRight<=firstRight.
            // If offset>0 then this node and its sub-nodes have been written already
            // and we need not write them again.
            // If this node is part of the unwritten right branch edge,
            // then we wait until that is written.
            if(offset<0 && (offset<lastRight || firstRight<offset)) {
                write(builder);
            }
        }
        inline int32_t getOffset() const { return offset; }
    protected:
        int32_t hash;
        int32_t offset;
    };

#ifndef U_HIDE_INTERNAL_API
    // This class should not be overridden because
    // registerFinalValue() compares a stack-allocated FinalValueNode
    // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
    // with the input node, and the
    // !Node::operator==(other) used inside FinalValueNode::operator==(other)
    // will be false if the typeid's are different.
    /** @internal */
    class FinalValueNode : public Node {
    public:
        FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
        virtual UBool operator==(const Node &other) const;
        virtual void write(StringTrieBuilder &builder);
    protected:
        int32_t value;
    };
#endif  /* U_HIDE_INTERNAL_API */

    // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
    // it is needed for layout of other objects.
    /**
     * @internal 
     */
    class ValueNode : public Node {
    public:
        ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
        virtual UBool operator==(const Node &other) const;
        void setValue(int32_t v) {
            hasValue=TRUE;
            value=v;
            hash=hash*37u+v;
        }
    protected:
        UBool hasValue;
        int32_t value;
    };

#ifndef U_HIDE_INTERNAL_API
    /** 
     * @internal 
     */
    class IntermediateValueNode : public ValueNode {
    public:
        IntermediateValueNode(int32_t v, Node *nextNode)
                : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
        virtual UBool operator==(const Node &other) const;
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
        virtual void write(StringTrieBuilder &builder);
    protected:
        Node *next;
    };
#endif  /* U_HIDE_INTERNAL_API */

    // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
    // it is needed for layout of other objects.
    /**
     * @internal 
     */
    class LinearMatchNode : public ValueNode {
    public:
        LinearMatchNode(int32_t len, Node *nextNode)
                : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
                  length(len), next(nextNode) {}
        virtual UBool operator==(const Node &other) const;
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
    protected:
        int32_t length;
        Node *next;
    };

#ifndef U_HIDE_INTERNAL_API
    /**
     * @internal 
     */
    class BranchNode : public Node {
    public:
        BranchNode(int32_t initialHash) : Node(initialHash) {}
    protected:
        int32_t firstEdgeNumber;
    };

    /**
     * @internal 
     */
    class ListBranchNode : public BranchNode {
    public:
        ListBranchNode() : BranchNode(0x444444), length(0) {}
        virtual UBool operator==(const Node &other) const;
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
        virtual void write(StringTrieBuilder &builder);
        // Adds a unit with a final value.
        void add(int32_t c, int32_t value) {
            units[length]=(char16_t)c;
            equal[length]=NULL;
            values[length]=value;
            ++length;
            hash=(hash*37u+c)*37u+value;
        }
        // Adds a unit which leads to another match node.
        void add(int32_t c, Node *node) {
            units[length]=(char16_t)c;
            equal[length]=node;
            values[length]=0;
            ++length;
            hash=(hash*37u+c)*37u+hashCode(node);
        }
    protected:
        Node *equal[kMaxBranchLinearSubNodeLength];  // NULL means "has final value".
        int32_t length;
        int32_t values[kMaxBranchLinearSubNodeLength];
        char16_t units[kMaxBranchLinearSubNodeLength];
    };

    /**
     * @internal 
     */
    class SplitBranchNode : public BranchNode {
    public:
        SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
                : BranchNode(((0x555555u*37u+middleUnit)*37u+
                              hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
                  unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
        virtual UBool operator==(const Node &other) const;
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
        virtual void write(StringTrieBuilder &builder);
    protected:
        char16_t unit;
        Node *lessThan;
        Node *greaterOrEqual;
    };

    // Branch head node, for writing the actual node lead unit.
    /** @internal */
    class BranchHeadNode : public ValueNode {
    public:
        BranchHeadNode(int32_t len, Node *subNode)
                : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
                  length(len), next(subNode) {}
        virtual UBool operator==(const Node &other) const;
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
        virtual void write(StringTrieBuilder &builder);
    protected:
        int32_t length;
        Node *next;  // A branch sub-node.
    };

#endif  /* U_HIDE_INTERNAL_API */
    /// \endcond

    /** @internal */
    virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
                                        Node *nextNode) const = 0;

    /** @internal */
    virtual int32_t write(int32_t unit) = 0;
    /** @internal */
    virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
    /** @internal */
    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
    /** @internal */
    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
    /** @internal */
    virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */



U_NAMESPACE_BEGIN

class BytesTrieElement;
class CharString;
/**
 * Builder class for BytesTrie.
 *
 * This class is not intended for public subclassing.
 * @stable ICU 4.8
 */
class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
public:
    /**
     * Constructs an empty builder.
     * @param errorCode Standard ICU error code.
     * @stable ICU 4.8
     */
    BytesTrieBuilder(UErrorCode &errorCode);

    /**
     * Destructor.
     * @stable ICU 4.8
     */
    virtual ~BytesTrieBuilder();

    /**
     * Adds a (byte sequence, value) pair.
     * The byte sequence must be unique.
     * The bytes will be copied; the builder does not keep
     * a reference to the input StringPiece or its data().
     * @param s The input byte sequence.
     * @param value The value associated with this byte sequence.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return *this
     * @stable ICU 4.8
     */
    BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode);

    /**
     * Builds a BytesTrie for the add()ed data.
     * Once built, no further data can be add()ed until clear() is called.
     *
     * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
     * must have been add()ed.
     *
     * This method passes ownership of the builder's internal result array to the new trie object.
     * Another call to any build() variant will re-serialize the trie.
     * After clear() has been called, a new array will be used as well.
     * @param buildOption Build option, see UStringTrieBuildOption.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return A new BytesTrie for the add()ed data.
     * @stable ICU 4.8
     */
    BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);

    /**
     * Builds a BytesTrie for the add()ed data and byte-serializes it.
     * Once built, no further data can be add()ed until clear() is called.
     *
     * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
     * must have been add()ed.
     *
     * Multiple calls to buildStringPiece() return StringPieces referring to the
     * builder's same byte array, without rebuilding.
     * If buildStringPiece() is called after build(), the trie will be
     * re-serialized into a new array.
     * If build() is called after buildStringPiece(), the trie object will become
     * the owner of the previously returned array.
     * After clear() has been called, a new array will be used as well.
     * @param buildOption Build option, see UStringTrieBuildOption.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
     * @stable ICU 4.8
     */
    StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);

    /**
     * Removes all (byte sequence, value) pairs.
     * New data can then be add()ed and a new trie can be built.
     * @return *this
     * @stable ICU 4.8
     */
    BytesTrieBuilder &clear();

private:
    BytesTrieBuilder(const BytesTrieBuilder &other);  // no copy constructor
    BytesTrieBuilder &operator=(const BytesTrieBuilder &other);  // no assignment operator

    void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);

    virtual int32_t getElementStringLength(int32_t i) const;
    virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const;
    virtual int32_t getElementValue(int32_t i) const;

    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const;

    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const;
    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const;
    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const;

    virtual UBool matchNodesCanHaveValues() const { return FALSE; }

    virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
    virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
    virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }

    /**
     * @internal (private)
     */
    class BTLinearMatchNode : public LinearMatchNode {
    public:
        BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
        virtual UBool operator==(const Node &other) const;
        virtual void write(StringTrieBuilder &builder);
    private:
        const char *s;
    };
    
    virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
                                        Node *nextNode) const;

    UBool ensureCapacity(int32_t length);
    virtual int32_t write(int32_t byte);
    int32_t write(const char *b, int32_t length);
    virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length);
    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
    virtual int32_t writeDeltaTo(int32_t jumpTarget);

    CharString *strings;  // Pointer not object so we need not #include internal charstr.h.
    BytesTrieElement *elements;
    int32_t elementsCapacity;
    int32_t elementsLength;

    // Byte serialization of the trie.
    // Grows from the back: bytesLength measures from the end of the buffer!
    char *bytes;
    int32_t bytesCapacity;
    int32_t bytesLength;
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */





// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*   Copyright (C) 1997-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
******************************************************************************
*   Date        Name        Description
*   03/22/00    aliu        Adapted from original C++ ICU Hashtable.
*   07/06/01    aliu        Modified to support int32_t keys on
*                           platforms with sizeof(void*) < 32.
******************************************************************************
*/




// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 1997-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  uelement.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2011jul04
*   created by: Markus W. Scherer
*
*   Common definitions for UHashTable and UVector.
*   UHashTok moved here from uhash.h and renamed UElement.
*   This allows users of UVector to avoid the confusing #include of uhash.h.
*   uhash.h aliases UElement to UHashTok,
*   so that we need not change all of its code and its users.
*/




U_CDECL_BEGIN

/**
 * A UVector element, or a key or value within a UHashtable.
 * It may be either a 32-bit integral value or an opaque void* pointer.
 * The void* pointer may be smaller than 32 bits (e.g. 24 bits)
 * or may be larger (e.g. 64 bits).
 *
 * Because a UElement is the size of a native pointer or a 32-bit
 * integer, we pass it around by value.
 */
union UElement {
    void*   pointer;
    int32_t integer;
};
typedef union UElement UElement;

/**
 * An element-equality (boolean) comparison function.
 * @param e1 An element (object or integer)
 * @param e2 An element (object or integer)
 * @return TRUE if the two elements are equal.
 */
typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2);

/**
 * An element sorting (three-way) comparison function.
 * @param e1 An element (object or integer)
 * @param e2 An element (object or integer)
 * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2.
 */
typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2);

/**
 * An element assignment function.  It may copy an integer, copy
 * a pointer, or clone a pointer, as appropriate.
 * @param dst The element to be assigned to
 * @param src The element to assign from
 */
typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src);

U_CDECL_END

/**
 * Comparator function for UnicodeString* keys. Implements UElementsAreEqual.
 * @param key1 The string for comparison
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
U_CAPI UBool U_EXPORT2 
uhash_compareUnicodeString(const UElement key1, const UElement key2);

/**
 * Comparator function for UnicodeString* keys (case insensitive).
 * Make sure to use together with uhash_hashCaselessUnicodeString.
 * Implements UElementsAreEqual.
 * @param key1 The string for comparison
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
U_CAPI UBool U_EXPORT2 
uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2);




/**
 * UHashtable stores key-value pairs and does moderately fast lookup
 * based on keys.  It provides a good tradeoff between access time and
 * storage space.  As elements are added to it, it grows to accomodate
 * them.  By default, the table never shrinks, even if all elements
 * are removed from it.
 *
 * Keys and values are stored as void* pointers.  These void* pointers
 * may be actual pointers to strings, objects, or any other structure
 * in memory, or they may simply be integral values cast to void*.
 * UHashtable doesn't care and manipulates them via user-supplied
 * functions.  These functions hash keys, compare keys, delete keys,
 * and delete values.  Some function pointers are optional (may be
 * NULL); others must be supplied.  Several prebuilt functions exist
 * to handle common key types.
 *
 * UHashtable ownership of keys and values is flexible, and controlled
 * by whether or not the key deleter and value deleter functions are
 * set.  If a void* key is actually a pointer to a deletable object,
 * then UHashtable can be made to delete that object by setting the
 * key deleter function pointer to a non-NULL value.  If this is done,
 * then keys passed to uhash_put() are owned by the hashtable and will
 * be deleted by it at some point, either as keys are replaced, or
 * when uhash_close() is finally called.  The same is true of values
 * and the value deleter function pointer.  Keys passed to methods
 * other than uhash_put() are never owned by the hashtable.
 *
 * NULL values are not allowed.  uhash_get() returns NULL to indicate
 * a key that is not in the table, and having a NULL value in the
 * table would generate an ambiguous result.  If a key and a NULL
 * value is passed to uhash_put(), this has the effect of doing a
 * uhash_remove() on that key.  This keeps uhash_get(), uhash_count(),
 * and uhash_nextElement() consistent with one another.
 *
 * To see everything in a hashtable, use uhash_nextElement() to
 * iterate through its contents.  Each call to this function returns a
 * UHashElement pointer.  A hash element contains a key, value, and
 * hashcode.  During iteration an element may be deleted by calling
 * uhash_removeElement(); iteration may safely continue thereafter.
 * The uhash_remove() function may also be safely called in
 * mid-iteration.  If uhash_put() is called during iteration,
 * the iteration is still guaranteed to terminate reasonably, but
 * there is no guarantee that every element will be returned or that
 * some won't be returned more than once.
 *
 * Under no circumstances should the UHashElement returned by
 * uhash_nextElement be modified directly.
 *
 * By default, the hashtable grows when necessary, but never shrinks,
 * even if all items are removed.  For most applications this is
 * optimal.  However, in a highly dynamic usage where memory is at a
 * premium, the table can be set to both grow and shrink by calling
 * uhash_setResizePolicy() with the policy U_GROW_AND_SHRINK.  In a
 * situation where memory is critical and the client wants a table
 * that does not grow at all, the constant U_FIXED can be used.
 */

/********************************************************************
 * Data Structures
 ********************************************************************/

U_CDECL_BEGIN

/**
 * A key or value within a UHashtable.
 * The hashing and comparison functions take a pointer to a
 * UHashTok, but the deleter receives the void* pointer within it.
 */
typedef UElement UHashTok;

/**
 * This is a single hash element.
 */
struct UHashElement {
    /* Reorder these elements to pack nicely if necessary */
    int32_t  hashcode;
    UHashTok value;
    UHashTok key;
};
typedef struct UHashElement UHashElement;

/**
 * A hashing function.
 * @param key A key stored in a hashtable
 * @return A NON-NEGATIVE hash code for parm.
 */
typedef int32_t U_CALLCONV UHashFunction(const UHashTok key);

/**
 * A key equality (boolean) comparison function.
 */
typedef UElementsAreEqual UKeyComparator;

/**
 * A value equality (boolean) comparison function.
 */
typedef UElementsAreEqual UValueComparator;

/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */

/**
 * This specifies whether or not, and how, the hastable resizes itself.
 * See uhash_setResizePolicy().
 */
enum UHashResizePolicy {
    U_GROW,            /* Grow on demand, do not shrink */
    U_GROW_AND_SHRINK, /* Grow and shrink on demand */
    U_FIXED            /* Never change size */
};

/**
 * The UHashtable struct.  Clients should treat this as an opaque data
 * type and manipulate it only through the uhash_... API.
 */
struct UHashtable {

    /* Main key-value pair storage array */

    UHashElement *elements;

    /* Function pointers */

    UHashFunction *keyHasher;      /* Computes hash from key.
                                   * Never null. */
    UKeyComparator *keyComparator; /* Compares keys for equality.
                                   * Never null. */
    UValueComparator *valueComparator; /* Compares the values for equality */

    UObjectDeleter *keyDeleter;    /* Deletes keys when required.
                                   * If NULL won't do anything */
    UObjectDeleter *valueDeleter;  /* Deletes values when required.
                                   * If NULL won't do anything */

    /* Size parameters */

    int32_t     count;      /* The number of key-value pairs in this table.
                             * 0 <= count <= length.  In practice we
                             * never let count == length (see code). */
    int32_t     length;     /* The physical size of the arrays hashes, keys
                             * and values.  Must be prime. */

    /* Rehashing thresholds */

    int32_t     highWaterMark;  /* If count > highWaterMark, rehash */
    int32_t     lowWaterMark;   /* If count < lowWaterMark, rehash */
    float       highWaterRatio; /* 0..1; high water as a fraction of length */
    float       lowWaterRatio;  /* 0..1; low water as a fraction of length */

    int8_t      primeIndex;     /* Index into our prime table for length.
                                 * length == PRIMES[primeIndex] */
    UBool       allocated; /* Was this UHashtable allocated? */
};
typedef struct UHashtable UHashtable;

U_CDECL_END

/********************************************************************
 * API
 ********************************************************************/

/**
 * Initialize a new UHashtable.
 * @param keyHash A pointer to the key hashing function.  Must not be
 * NULL.
 * @param keyComp A pointer to the function that compares keys.  Must
 * not be NULL.
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_openSize
 */
U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash,
           UKeyComparator *keyComp,
           UValueComparator *valueComp,
           UErrorCode *status);

/**
 * Initialize a new UHashtable with a given initial size.
 * @param keyHash A pointer to the key hashing function.  Must not be
 * NULL.
 * @param keyComp A pointer to the function that compares keys.  Must
 * not be NULL.
 * @param size The initial capacity of this hash table.
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_open
 */
U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash,
               UKeyComparator *keyComp,
               UValueComparator *valueComp,
               int32_t size,
               UErrorCode *status);

/**
 * Initialize an existing UHashtable.
 * @param keyHash A pointer to the key hashing function.  Must not be
 * NULL.
 * @param keyComp A pointer to the function that compares keys.  Must
 * not be NULL.
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_openSize
 */
U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *hash,
           UHashFunction *keyHash,
           UKeyComparator *keyComp,
           UValueComparator *valueComp,
           UErrorCode *status);

/**
 * Initialize an existing UHashtable.
 * @param keyHash A pointer to the key hashing function.  Must not be
 * NULL.
 * @param keyComp A pointer to the function that compares keys.  Must
 * not be NULL.
 * @param size The initial capacity of this hash table.
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return A pointer to a UHashtable, or 0 if an error occurred.
 * @see uhash_openSize
 */
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *hash,
               UHashFunction *keyHash,
               UKeyComparator *keyComp,
               UValueComparator *valueComp,
               int32_t size,
               UErrorCode *status);

/**
 * Close a UHashtable, releasing the memory used.
 * @param hash The UHashtable to close. If hash is NULL no operation is performed.
 */
U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash);



/**
 * Set the function used to hash keys.
 * @param hash The UHashtable to set
 * @param fn the function to be used hash keys; must not be NULL
 * @return the previous key hasher; non-NULL
 */
U_CAPI UHashFunction *U_EXPORT2
uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);

/**
 * Set the function used to compare keys.  The default comparison is a
 * void* pointer comparison.
 * @param hash The UHashtable to set
 * @param fn the function to be used compare keys; must not be NULL
 * @return the previous key comparator; non-NULL
 */
U_CAPI UKeyComparator *U_EXPORT2
uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);

/**
 * Set the function used to compare values.  The default comparison is a
 * void* pointer comparison.
 * @param hash The UHashtable to set
 * @param fn the function to be used compare keys; must not be NULL
 * @return the previous key comparator; non-NULL
 */
U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);

/**
 * Set the function used to delete keys.  If this function pointer is
 * NULL, this hashtable does not delete keys.  If it is non-NULL, this
 * hashtable does delete keys.  This function should be set once
 * before any elements are added to the hashtable and should not be
 * changed thereafter.
 * @param hash The UHashtable to set
 * @param fn the function to be used delete keys, or NULL
 * @return the previous key deleter; may be NULL
 */
U_CAPI UObjectDeleter *U_EXPORT2
uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);

/**
 * Set the function used to delete values.  If this function pointer
 * is NULL, this hashtable does not delete values.  If it is non-NULL,
 * this hashtable does delete values.  This function should be set
 * once before any elements are added to the hashtable and should not
 * be changed thereafter.
 * @param hash The UHashtable to set
 * @param fn the function to be used delete values, or NULL
 * @return the previous value deleter; may be NULL
 */
U_CAPI UObjectDeleter *U_EXPORT2
uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);

/**
 * Specify whether or not, and how, the hastable resizes itself.
 * By default, tables grow but do not shrink (policy U_GROW).
 * See enum UHashResizePolicy.
 * @param hash The UHashtable to set
 * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
 */
U_CAPI void U_EXPORT2
uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);

/**
 * Get the number of key-value pairs stored in a UHashtable.
 * @param hash The UHashtable to query.
 * @return The number of key-value pairs stored in hash.
 */
U_CAPI int32_t U_EXPORT2
uhash_count(const UHashtable *hash);

/**
 * Put a (key=pointer, value=pointer) item in a UHashtable.  If the
 * keyDeleter is non-NULL, then the hashtable owns 'key' after this
 * call.  If the valueDeleter is non-NULL, then the hashtable owns
 * 'value' after this call.  Storing a NULL value is the same as
 * calling uhash_remove().
 * @param hash The target UHashtable.
 * @param key The key to store.
 * @param value The value to store, may be NULL (see above).
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return The previous value, or NULL if none.
 * @see uhash_get
 */
U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash,
          void *key,
          void *value,
          UErrorCode *status);

/**
 * Put a (key=integer, value=pointer) item in a UHashtable.
 * keyDeleter must be NULL.  If the valueDeleter is non-NULL, then the
 * hashtable owns 'value' after this call.  Storing a NULL value is
 * the same as calling uhash_remove().
 * @param hash The target UHashtable.
 * @param key The integer key to store.
 * @param value The value to store, may be NULL (see above).
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return The previous value, or NULL if none.
 * @see uhash_get
 */
U_CAPI void* U_EXPORT2
uhash_iput(UHashtable *hash,
           int32_t key,
           void* value,
           UErrorCode *status);

/**
 * Put a (key=pointer, value=integer) item in a UHashtable.  If the
 * keyDeleter is non-NULL, then the hashtable owns 'key' after this
 * call.  valueDeleter must be NULL.  Storing a 0 value is the same as
 * calling uhash_remove().
 * @param hash The target UHashtable.
 * @param key The key to store.
 * @param value The integer value to store.
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return The previous value, or 0 if none.
 * @see uhash_get
 */
U_CAPI int32_t U_EXPORT2
uhash_puti(UHashtable *hash,
           void* key,
           int32_t value,
           UErrorCode *status);

/**
 * Put a (key=integer, value=integer) item in a UHashtable.  If the
 * keyDeleter is non-NULL, then the hashtable owns 'key' after this
 * call.  valueDeleter must be NULL.  Storing a 0 value is the same as
 * calling uhash_remove().
 * @param hash The target UHashtable.
 * @param key The key to store.
 * @param value The integer value to store.
 * @param status A pointer to an UErrorCode to receive any errors.
 * @return The previous value, or 0 if none.
 * @see uhash_get
 */
U_CAPI int32_t U_EXPORT2
uhash_iputi(UHashtable *hash,
           int32_t key,
           int32_t value,
           UErrorCode *status);

/**
 * Retrieve a pointer value from a UHashtable using a pointer key,
 * as previously stored by uhash_put().
 * @param hash The target UHashtable.
 * @param key A pointer key stored in a hashtable
 * @return The requested item, or NULL if not found.
 */
U_CAPI void* U_EXPORT2
uhash_get(const UHashtable *hash,
          const void *key);

/**
 * Retrieve a pointer value from a UHashtable using a integer key,
 * as previously stored by uhash_iput().
 * @param hash The target UHashtable.
 * @param key An integer key stored in a hashtable
 * @return The requested item, or NULL if not found.
 */
U_CAPI void* U_EXPORT2
uhash_iget(const UHashtable *hash,
           int32_t key);

/**
 * Retrieve an integer value from a UHashtable using a pointer key,
 * as previously stored by uhash_puti().
 * @param hash The target UHashtable.
 * @param key A pointer key stored in a hashtable
 * @return The requested item, or 0 if not found.
 */
U_CAPI int32_t U_EXPORT2
uhash_geti(const UHashtable *hash,
           const void* key);
/**
 * Retrieve an integer value from a UHashtable using an integer key,
 * as previously stored by uhash_iputi().
 * @param hash The target UHashtable.
 * @param key An integer key stored in a hashtable
 * @return The requested item, or 0 if not found.
 */
U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable *hash,
           int32_t key);

/**
 * Remove an item from a UHashtable stored by uhash_put().
 * @param hash The target UHashtable.
 * @param key A key stored in a hashtable
 * @return The item removed, or NULL if not found.
 */
U_CAPI void* U_EXPORT2
uhash_remove(UHashtable *hash,
             const void *key);

/**
 * Remove an item from a UHashtable stored by uhash_iput().
 * @param hash The target UHashtable.
 * @param key An integer key stored in a hashtable
 * @return The item removed, or NULL if not found.
 */
U_CAPI void* U_EXPORT2
uhash_iremove(UHashtable *hash,
              int32_t key);

/**
 * Remove an item from a UHashtable stored by uhash_puti().
 * @param hash The target UHashtable.
 * @param key An key stored in a hashtable
 * @return The item removed, or 0 if not found.
 */
U_CAPI int32_t U_EXPORT2
uhash_removei(UHashtable *hash,
              const void* key);

/**
 * Remove an item from a UHashtable stored by uhash_iputi().
 * @param hash The target UHashtable.
 * @param key An integer key stored in a hashtable
 * @return The item removed, or 0 if not found.
 */
U_CAPI int32_t U_EXPORT2
uhash_iremovei(UHashtable *hash,
               int32_t key);

/**
 * Remove all items from a UHashtable.
 * @param hash The target UHashtable.
 */
U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash);

/**
 * Locate an element of a UHashtable.  The caller must not modify the
 * returned object.  The primary use of this function is to obtain the
 * stored key when it may not be identical to the search key.  For
 * example, if the compare function is a case-insensitive string
 * compare, then the hash key may be desired in order to obtain the
 * canonical case corresponding to a search key.
 * @param hash The target UHashtable.
 * @param key A key stored in a hashtable
 * @return a hash element, or NULL if the key is not found.
 */
U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key);

/**
 * \def UHASH_FIRST
 * Constant for use with uhash_nextElement
 * @see uhash_nextElement
 */
#define UHASH_FIRST (-1)

/**
 * Iterate through the elements of a UHashtable.  The caller must not
 * modify the returned object.  However, uhash_removeElement() may be
 * called during iteration to remove an element from the table.
 * Iteration may safely be resumed afterwards.  If uhash_put() is
 * called during iteration the iteration will then be out of sync and
 * should be restarted.
 * @param hash The target UHashtable.
 * @param pos This should be set to UHASH_FIRST initially, and left untouched
 * thereafter.
 * @return a hash element, or NULL if no further key-value pairs
 * exist in the table.
 */
U_CAPI const UHashElement* U_EXPORT2
uhash_nextElement(const UHashtable *hash,
                  int32_t *pos);

/**
 * Remove an element, returned by uhash_nextElement(), from the table.
 * Iteration may be safely continued afterwards.
 * @param hash The hashtable
 * @param e The element, returned by uhash_nextElement(), to remove.
 * Must not be NULL.  Must not be an empty or deleted element (as long
 * as this was returned by uhash_nextElement() it will not be empty or
 * deleted).  Note: Although this parameter is const, it will be
 * modified.
 * @return the value that was removed.
 */
U_CAPI void* U_EXPORT2
uhash_removeElement(UHashtable *hash, const UHashElement* e);

/********************************************************************
 * UHashTok convenience
 ********************************************************************/

/**
 * Return a UHashTok for an integer.
 * @param i The given integer
 * @return a UHashTok for an integer.
 */
/*U_CAPI UHashTok U_EXPORT2
uhash_toki(int32_t i);*/

/**
 * Return a UHashTok for a pointer.
 * @param p The given pointer
 * @return a UHashTok for a pointer.
 */
/*U_CAPI UHashTok U_EXPORT2
uhash_tokp(void* p);*/

/********************************************************************
 * UChar* and char* Support Functions
 ********************************************************************/

/**
 * Generate a hash code for a null-terminated UChar* string.  If the
 * string is not null-terminated do not use this function.  Use
 * together with uhash_compareUChars.
 * @param key The string (const UChar*) to hash.
 * @return A hash code for the key.
 */
U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key);

/**
 * Generate a hash code for a null-terminated char* string.  If the
 * string is not null-terminated do not use this function.  Use
 * together with uhash_compareChars.
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key);

/**
 * Generate a case-insensitive hash code for a null-terminated char*
 * string.  If the string is not null-terminated do not use this
 * function.  Use together with uhash_compareIChars.
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
U_CAPI int32_t U_EXPORT2
uhash_hashIChars(const UHashTok key);

/**
 * Comparator for null-terminated UChar* strings.  Use together with
 * uhash_hashUChars.
 * @param key1 The string for comparison
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
U_CAPI UBool U_EXPORT2
uhash_compareUChars(const UHashTok key1, const UHashTok key2);

/**
 * Comparator for null-terminated char* strings.  Use together with
 * uhash_hashChars.
 * @param key1 The string for comparison
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
U_CAPI UBool U_EXPORT2
uhash_compareChars(const UHashTok key1, const UHashTok key2);

/**
 * Case-insensitive comparator for null-terminated char* strings.  Use
 * together with uhash_hashIChars.
 * @param key1 The string for comparison
 * @param key2 The string for comparison
 * @return true if key1 and key2 are equal, return false otherwise.
 */
U_CAPI UBool U_EXPORT2
uhash_compareIChars(const UHashTok key1, const UHashTok key2);

/********************************************************************
 * UnicodeString Support Functions
 ********************************************************************/

/**
 * Hash function for UnicodeString* keys.
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key);

/**
 * Hash function for UnicodeString* keys (case insensitive).
 * Make sure to use together with uhash_compareCaselessUnicodeString.
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key);

/********************************************************************
 * int32_t Support Functions
 ********************************************************************/

/**
 * Hash function for 32-bit integer keys.
 * @param key The string (const char*) to hash.
 * @return A hash code for the key.
 */
U_CAPI int32_t U_EXPORT2
uhash_hashLong(const UHashTok key);

/**
 * Comparator function for 32-bit integer keys.
 * @param key1 The integer for comparison
 * @param Key2 The integer for comparison
 * @return true if key1 and key2 are equal, return false otherwise
 */
U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1, const UHashTok key2);

/********************************************************************
 * Other Support Functions
 ********************************************************************/

/**
 * Deleter for Hashtable objects.
 * @param obj The object to be deleted
 */
U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj);

/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */

/**
 * Checks if the given hash tables are equal or not.
 * @param hash1
 * @param hash2
 * @return true if the hashtables are equal and false if not.
 */
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2);


#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUHashtablePointer
 * "Smart pointer" class, closes a UHashtable via uhash_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 4.4
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUHashtablePointer, UHashtable, uhash_close);

U_NAMESPACE_END

#endif


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2003-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  uarrsort.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003aug04
*   created by: Markus W. Scherer
*
*   Internal function for sorting arrays.
*/




U_CDECL_BEGIN
/**
 * Function type for comparing two items as part of sorting an array or similar.
 * Callback function for uprv_sortArray().
 *
 * @param context Application-specific pointer, passed through by uprv_sortArray().
 * @param left    Pointer to the "left" item.
 * @param right   Pointer to the "right" item.
 * @return 32-bit signed integer comparison result:
 *                <0 if left<right
 *               ==0 if left==right
 *                >0 if left>right
 *
 * @internal
 */
typedef int32_t U_CALLCONV
UComparator(const void *context, const void *left, const void *right);
U_CDECL_END

/**
 * Array sorting function.
 * Uses a UComparator for comparing array items to each other, and simple
 * memory copying to move items.
 *
 * @param array      The array to be sorted.
 * @param length     The number of items in the array.
 * @param itemSize   The size in bytes of each array item.
 * @param cmp        UComparator function used to compare two items each.
 * @param context    Application-specific pointer, passed through to the UComparator.
 * @param sortStable If true, a stable sorting algorithm must be used.
 * @param pErrorCode ICU in/out UErrorCode parameter.
 *
 * @internal
 */
U_CAPI void U_EXPORT2
uprv_sortArray(void *array, int32_t length, int32_t itemSize,
               UComparator *cmp, const void *context,
               UBool sortStable, UErrorCode *pErrorCode);

/**
 * Convenience UComparator implementation for uint16_t arrays.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
uprv_uint16Comparator(const void *context, const void *left, const void *right);

/**
 * Convenience UComparator implementation for int32_t arrays.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
uprv_int32Comparator(const void *context, const void *left, const void *right);

/**
 * Convenience UComparator implementation for uint32_t arrays.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
uprv_uint32Comparator(const void *context, const void *left, const void *right);

/**
 * Much like Java Collections.binarySearch(list, key, comparator).
 *
 * Except: Java documents "If the list contains multiple elements equal to
 * the specified object, there is no guarantee which one will be found."
 *
 * This version here will return the largest index of any equal item,
 * for use in stable sorting.
 *
 * @return the index>=0 where the item was found:
 *         the largest such index, if multiple, for stable sorting;
 *         or the index<0 for inserting the item at ~index in sorted order
 */
U_CAPI int32_t U_EXPORT2
uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize,
                        UComparator *cmp, const void *context);



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*  
**********************************************************************
*   Copyright (C) 1999-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   file name:  ustr_imp.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001jan30
*   created by: Markus W. Scherer
*/





/**
 * Internal option for unorm_cmpEquivFold() for strncmp style.
 * If set, checks for both string length and terminating NUL.
 */
#define _STRNCMP_STYLE 0x1000

/**
 * Compare two strings in code point order or code unit order.
 * Works in strcmp style (both lengths -1),
 * strncmp style (lengths equal and >=0, flag TRUE),
 * and memcmp/UnicodeString style (at least one length >=0).
 */
U_CFUNC int32_t U_EXPORT2
uprv_strCompare(const UChar *s1, int32_t length1,
                const UChar *s2, int32_t length2,
                UBool strncmpStyle, UBool codePointOrder);

U_CAPI int32_t U_EXPORT2 
ustr_hashUCharsN(const UChar *str, int32_t length);

U_CAPI int32_t U_EXPORT2 
ustr_hashCharsN(const char *str, int32_t length);

U_CAPI int32_t U_EXPORT2
ustr_hashICharsN(const char *str, int32_t length);

/**
 * NUL-terminate a UChar * string if possible.
 * If length  < destCapacity then NUL-terminate.
 * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.
 * If length  > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.
 *
 * @param dest Destination buffer, can be NULL if destCapacity==0.
 * @param destCapacity Number of UChars available at dest.
 * @param length Number of UChars that were (to be) written to dest.
 * @param pErrorCode ICU error code.
 * @return length
 */
U_CAPI int32_t U_EXPORT2
u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);

/**
 * NUL-terminate a char * string if possible.
 * Same as u_terminateUChars() but for a different string type.
 */
U_CAPI int32_t U_EXPORT2
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);

/**
 * NUL-terminate a UChar32 * string if possible.
 * Same as u_terminateUChars() but for a different string type.
 */
U_CAPI int32_t U_EXPORT2
u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);

/**
 * NUL-terminate a wchar_t * string if possible.
 * Same as u_terminateUChars() but for a different string type.
 */
U_CAPI int32_t U_EXPORT2
u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);

/**
 * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
 * Returns 1 for ASCII 0..0x7f.
 * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff.
 * leadByte might be evaluated multiple times.
 *
 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
 * @return 0..4
 */
#define U8_COUNT_BYTES(leadByte) \
    (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte))

/**
 * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.
 * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff.
 * leadByte might be evaluated multiple times.
 *
 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
 * @return 0 or 2..4
 */
#define U8_COUNT_BYTES_NON_ASCII(leadByte) \
    (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0)

#ifdef __cplusplus

U_NAMESPACE_BEGIN

class UTF8 {
public:
    UTF8() = delete;  // all static

    /**
     * Is t a valid UTF-8 trail byte?
     *
     * @param prev Must be the preceding lead byte if i==1 and length>=3;
     *             otherwise ignored.
     * @param t The i-th byte following the lead byte.
     * @param i The index (1..3) of byte t in the byte sequence. 0<i<length
     * @param length The length (2..4) of the byte sequence according to the lead byte.
     * @return TRUE if t is a valid trail byte in this context.
     */
    static inline UBool isValidTrail(int32_t prev, uint8_t t, int32_t i, int32_t length) {
        // The first trail byte after a 3- or 4-byte lead byte
        // needs to be validated together with its lead byte.
        if (length <= 2 || i > 1) {
            return U8_IS_TRAIL(t);
        } else if (length == 3) {
            return U8_IS_VALID_LEAD3_AND_T1(prev, t);
        } else {  // length == 4
            return U8_IS_VALID_LEAD4_AND_T1(prev, t);
        }
    }
};

U_NAMESPACE_END

#endif  // __cplusplus



U_NAMESPACE_BEGIN

/*
 * Note: This builder implementation stores (bytes, value) pairs with full copies
 * of the byte sequences, until the BytesTrie is built.
 * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
 */

class BytesTrieElement : public UMemory {
public:
    // Use compiler's default constructor, initializes nothing.

    void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode);

    StringPiece getString(const CharString &strings) const {
        int32_t offset=stringOffset;
        int32_t length;
        if(offset>=0) {
            length=(uint8_t)strings[offset++];
        } else {
            offset=~offset;
            length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
            offset+=2;
        }
        return StringPiece(strings.data()+offset, length);
    }
    int32_t getStringLength(const CharString &strings) const {
        int32_t offset=stringOffset;
        if(offset>=0) {
            return (uint8_t)strings[offset];
        } else {
            offset=~offset;
            return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
        }
    }

    char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }

    int32_t getValue() const { return value; }

    int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;

private:
    const char *data(const CharString &strings) const {
        int32_t offset=stringOffset;
        if(offset>=0) {
            ++offset;
        } else {
            offset=~offset+2;
        }
        return strings.data()+offset;
    }

    // If the stringOffset is non-negative, then the first strings byte contains
    // the string length.
    // If the stringOffset is negative, then the first two strings bytes contain
    // the string length (big-endian), and the offset needs to be bit-inverted.
    // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
    int32_t stringOffset;
    int32_t value;
};

void
BytesTrieElement::setTo(StringPiece s, int32_t val,
                        CharString &strings, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    int32_t length=s.length();
    if(length>0xffff) {
        // Too long: We store the length in 1 or 2 bytes.
        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        return;
    }
    int32_t offset=strings.length();
    if(length>0xff) {
        offset=~offset;
        strings.append((char)(length>>8), errorCode);
    }
    strings.append((char)length, errorCode);
    stringOffset=offset;
    value=val;
    strings.append(s, errorCode);
}

int32_t
BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
    // TODO: add StringPiece::compare(), see ticket #8187
    StringPiece thisString=getString(strings);
    StringPiece otherString=other.getString(strings);
    int32_t lengthDiff=thisString.length()-otherString.length();
    int32_t commonLength;
    if(lengthDiff<=0) {
        commonLength=thisString.length();
    } else {
        commonLength=otherString.length();
    }
    int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
    return diff!=0 ? diff : lengthDiff;
}

BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
        : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
          bytes(NULL), bytesCapacity(0), bytesLength(0) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    strings=new CharString();
    if(strings==NULL) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
}

BytesTrieBuilder::~BytesTrieBuilder() {
    delete strings;
    delete[] elements;
    uprv_free(bytes);
}

BytesTrieBuilder &
BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return *this;
    }
    if(bytesLength>0) {
        // Cannot add elements after building.
        errorCode=U_NO_WRITE_PERMISSION;
        return *this;
    }
    if(elementsLength==elementsCapacity) {
        int32_t newCapacity;
        if(elementsCapacity==0) {
            newCapacity=1024;
        } else {
            newCapacity=4*elementsCapacity;
        }
        BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
        if(newElements==NULL) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return *this; // error instead of dereferencing null
        }
        if(elementsLength>0) {
            uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement));
        }
        delete[] elements;
        elements=newElements;
        elementsCapacity=newCapacity;
    }
    elements[elementsLength++].setTo(s, value, *strings, errorCode);
    return *this;
}

U_CDECL_BEGIN

static int32_t U_CALLCONV
bytestriebuilder_compareElementStrings(const void *context, const void *left, const void *right) {
    const CharString *strings=static_cast<const CharString *>(context);
    const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
    const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
    return leftElement->compareStringTo(*rightElement, *strings);
}

U_CDECL_END

BytesTrie *
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
    buildBytes(buildOption, errorCode);
    BytesTrie *newTrie=NULL;
    if(U_SUCCESS(errorCode)) {
        newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
        if(newTrie==NULL) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
        } else {
            bytes=NULL;  // The new trie now owns the array.
            bytesCapacity=0;
        }
    }
    return newTrie;
}

StringPiece
BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
    buildBytes(buildOption, errorCode);
    StringPiece result;
    if(U_SUCCESS(errorCode)) {
        result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
    }
    return result;
}

void
BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    if(bytes!=NULL && bytesLength>0) {
        // Already built.
        return;
    }
    if(bytesLength==0) {
        if(elementsLength==0) {
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return;
        }
        uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
                      bytestriebuilder_compareElementStrings, strings,
                      FALSE,  // need not be a stable sort
                      &errorCode);
        if(U_FAILURE(errorCode)) {
            return;
        }
        // Duplicate strings are not allowed.
        StringPiece prev=elements[0].getString(*strings);
        for(int32_t i=1; i<elementsLength; ++i) {
            StringPiece current=elements[i].getString(*strings);
            if(prev==current) {
                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
                return;
            }
            prev=current;
        }
    }
    // Create and byte-serialize the trie for the elements.
    bytesLength=0;
    int32_t capacity=strings->length();
    if(capacity<1024) {
        capacity=1024;
    }
    if(bytesCapacity<capacity) {
        uprv_free(bytes);
        bytes=static_cast<char *>(uprv_malloc(capacity));
        if(bytes==NULL) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            bytesCapacity=0;
            return;
        }
        bytesCapacity=capacity;
    }
    StringTrieBuilder::build(buildOption, elementsLength, errorCode);
    if(bytes==NULL) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
}

BytesTrieBuilder &
BytesTrieBuilder::clear() {
    strings->clear();
    elementsLength=0;
    bytesLength=0;
    return *this;
}

int32_t
BytesTrieBuilder::getElementStringLength(int32_t i) const {
    return elements[i].getStringLength(*strings);
}

UChar
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
    return (uint8_t)elements[i].charAt(byteIndex, *strings);
}

int32_t
BytesTrieBuilder::getElementValue(int32_t i) const {
    return elements[i].getValue();
}

int32_t
BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
    const BytesTrieElement &firstElement=elements[first];
    const BytesTrieElement &lastElement=elements[last];
    int32_t minStringLength=firstElement.getStringLength(*strings);
    while(++byteIndex<minStringLength &&
            firstElement.charAt(byteIndex, *strings)==
            lastElement.charAt(byteIndex, *strings)) {}
    return byteIndex;
}

int32_t
BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
    int32_t length=0;  // Number of different bytes at byteIndex.
    int32_t i=start;
    do {
        char byte=elements[i++].charAt(byteIndex, *strings);
        while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
            ++i;
        }
        ++length;
    } while(i<limit);
    return length;
}

int32_t
BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
    do {
        char byte=elements[i++].charAt(byteIndex, *strings);
        while(byte==elements[i].charAt(byteIndex, *strings)) {
            ++i;
        }
    } while(--count>0);
    return i;
}

int32_t
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
    char b=(char)byte;
    while(b==elements[i].charAt(byteIndex, *strings)) {
        ++i;
    }
    return i;
}

BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
        : LinearMatchNode(len, nextNode), s(bytes) {
    hash=static_cast<int32_t>(
        static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
}

UBool
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
    if(this==&other) {
        return TRUE;
    }
    if(!LinearMatchNode::operator==(other)) {
        return FALSE;
    }
    const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
    return 0==uprv_memcmp(s, o.s, length);
}

void
BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
    BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
    next->write(builder);
    b.write(s, length);
    offset=b.write(b.getMinLinearMatch()+length-1);
}

StringTrieBuilder::Node *
BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
                                        Node *nextNode) const {
    return new BTLinearMatchNode(
            elements[i].getString(*strings).data()+byteIndex,
            length,
            nextNode);
}

UBool
BytesTrieBuilder::ensureCapacity(int32_t length) {
    if(bytes==NULL) {
        return FALSE;  // previous memory allocation had failed
    }
    if(length>bytesCapacity) {
        int32_t newCapacity=bytesCapacity;
        do {
            newCapacity*=2;
        } while(newCapacity<=length);
        char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
        if(newBytes==NULL) {
            // unable to allocate memory
            uprv_free(bytes);
            bytes=NULL;
            bytesCapacity=0;
            return FALSE;
        }
        uprv_memcpy(newBytes+(newCapacity-bytesLength),
                    bytes+(bytesCapacity-bytesLength), bytesLength);
        uprv_free(bytes);
        bytes=newBytes;
        bytesCapacity=newCapacity;
    }
    return TRUE;
}

int32_t
BytesTrieBuilder::write(int32_t byte) {
    int32_t newLength=bytesLength+1;
    if(ensureCapacity(newLength)) {
        bytesLength=newLength;
        bytes[bytesCapacity-bytesLength]=(char)byte;
    }
    return bytesLength;
}

int32_t
BytesTrieBuilder::write(const char *b, int32_t length) {
    int32_t newLength=bytesLength+length;
    if(ensureCapacity(newLength)) {
        bytesLength=newLength;
        uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
    }
    return bytesLength;
}

int32_t
BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
    return write(elements[i].getString(*strings).data()+byteIndex, length);
}

int32_t
BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
    if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
        return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
    }
    char intBytes[5];
    int32_t length=1;
    if(i<0 || i>0xffffff) {
        intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
        intBytes[1]=(char)((uint32_t)i>>24);
        intBytes[2]=(char)((uint32_t)i>>16);
        intBytes[3]=(char)((uint32_t)i>>8);
        intBytes[4]=(char)i;
        length=5;
    // } else if(i<=BytesTrie::kMaxOneByteValue) {
    //     intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
    } else {
        if(i<=BytesTrie::kMaxTwoByteValue) {
            intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
        } else {
            if(i<=BytesTrie::kMaxThreeByteValue) {
                intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
            } else {
                intBytes[0]=(char)BytesTrie::kFourByteValueLead;
                intBytes[1]=(char)(i>>16);
                length=2;
            }
            intBytes[length++]=(char)(i>>8);
        }
        intBytes[length++]=(char)i;
    }
    intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
    return write(intBytes, length);
}

int32_t
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
    int32_t offset=write(node);
    if(hasValue) {
        offset=writeValueAndFinal(value, FALSE);
    }
    return offset;
}

int32_t
BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
    int32_t i=bytesLength-jumpTarget;
    U_ASSERT(i>=0);
    if(i<=BytesTrie::kMaxOneByteDelta) {
        return write(i);
    }
    char intBytes[5];
    int32_t length;
    if(i<=BytesTrie::kMaxTwoByteDelta) {
        intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
        length=1;
    } else {
        if(i<=BytesTrie::kMaxThreeByteDelta) {
            intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
            length=2;
        } else {
            if(i<=0xffffff) {
                intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
                length=3;
            } else {
                intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
                intBytes[1]=(char)(i>>24);
                length=4;
            }
            intBytes[1]=(char)(i>>16);
        }
        intBytes[1]=(char)(i>>8);
    }
    intBytes[length++]=(char)i;
    return write(intBytes, length);
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  bytestrieiterator.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010nov03
*   created by: Markus W. Scherer
*/





// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1999-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

//
//  UVector32 is a class implementing a vector of 32 bit integers.
//            It is similar to UVector, but holds int32_t values rather than pointers.
//            Most of the code is unchanged from UVector.
//







U_NAMESPACE_BEGIN



/**
 * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector
 * that is (mostly) compatible with java.util.Vector.
 *
 * <p>This is a very simple implementation, written to satisfy an
 * immediate porting need.  As such, it is not completely fleshed out,
 * and it aims for simplicity and conformity.  Nonetheless, it serves
 * its purpose (porting code from java that uses java.util.Vector)
 * well, and it could be easily made into a more robust vector class.
 *
 * <p><b>Design notes</b>
 *
 * <p>There is index bounds checking, but little is done about it.  If
 * indices are out of bounds, either nothing happens, or zero is
 * returned.  We <em>do</em> avoid indexing off into the weeds.
 *
 * <p>There is detection of out of memory, but the handling is very
 * coarse-grained -- similar to UnicodeString's protocol, but even
 * coarser.  The class contains <em>one static flag</em> that is set
 * when any call to <tt>new</tt> returns zero.  This allows the caller
 * to use several vectors and make just one check at the end to see if
 * a memory failure occurred.  This is more efficient than making a
 * check after each call on each vector when doing many operations on
 * multiple vectors.  The single static flag works best when memory
 * failures are infrequent, and when recovery options are limited or
 * nonexistent.
 *
 * <p><b>To do</b>
 *
 * <p>Improve the handling of index out of bounds errors.
 *
 * @author Alan Liu
 */
class U_COMMON_API UVector32 : public UObject {
private:
    int32_t   count;

    int32_t   capacity;
    
    int32_t   maxCapacity;   // Limit beyond which capacity is not permitted to grow.

    int32_t*  elements;

public:
    UVector32(UErrorCode &status);

    UVector32(int32_t initialCapacity, UErrorCode &status);

    virtual ~UVector32();

    /**
     * Assign this object to another (make this a copy of 'other').
     * Use the 'assign' function to assign each element.
     */
    void assign(const UVector32& other, UErrorCode &ec);

    /**
     * Compare this vector with another.  They will be considered
     * equal if they are of the same size and all elements are equal,
     * as compared using this object's comparer.
     */
    UBool operator==(const UVector32& other);

    /**
     * Equivalent to !operator==()
     */
    inline UBool operator!=(const UVector32& other);

    //------------------------------------------------------------
    // java.util.Vector API
    //------------------------------------------------------------

    inline void addElement(int32_t elem, UErrorCode &status);

    void setElementAt(int32_t elem, int32_t index);

    void insertElementAt(int32_t elem, int32_t index, UErrorCode &status);
    
    inline int32_t elementAti(int32_t index) const;

    UBool equals(const UVector32 &other) const;

    inline int32_t lastElementi(void) const;

    int32_t indexOf(int32_t elem, int32_t startIndex = 0) const;

    inline UBool contains(int32_t elem) const;

    UBool containsAll(const UVector32& other) const;

    UBool removeAll(const UVector32& other);

    UBool retainAll(const UVector32& other);

    void removeElementAt(int32_t index);

    void removeAllElements();

    inline int32_t size(void) const;

    inline UBool isEmpty(void) const;

    // Inline.  Use this one for speedy size check.
    inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);

    // Out-of-line, handles actual growth.  Called by ensureCapacity() when necessary.
    UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);

    /**
     * Change the size of this vector as follows: If newSize is
     * smaller, then truncate the array, possibly deleting held
     * elements for i >= newSize.  If newSize is larger, grow the
     * array, filling in new slows with zero.
     */
    void setSize(int32_t newSize);

    //------------------------------------------------------------
    // New API
    //------------------------------------------------------------

    /**
     * Returns true if this vector contains none of the elements
     * of the given vector.
     * @param other vector to be checked for containment
     * @return true if the test condition is met
     */
    UBool containsNone(const UVector32& other) const;


    /**
     * Insert the given integer into this vector at its sorted position.
     * The current elements are assumed to be sorted already.
     */
    void sortedInsert(int32_t elem, UErrorCode& ec);

    /**
     * Returns a pointer to the internal array holding the vector.
     */
    inline int32_t *getBuffer() const;

    /**
     * Set the maximum allowed buffer capacity for this vector/stack.
     * Default with no limit set is unlimited, go until malloc() fails.
     * A Limit of zero means unlimited capacity.
     * Units are vector elements (32 bits each), not bytes.
     */
    void setMaxCapacity(int32_t limit);

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     */
    static UClassID U_EXPORT2 getStaticClassID();

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     */
    virtual UClassID getDynamicClassID() const;

private:
    void _init(int32_t initialCapacity, UErrorCode &status);

    // Disallow
    UVector32(const UVector32&);

    // Disallow
    UVector32& operator=(const UVector32&);


    //  API Functions for Stack operations.
    //  In the original UVector, these were in a separate derived class, UStack.
    //  Here in UVector32, they are all together.
public:
    inline UBool empty(void) const;   // TODO:  redundant, same as empty().  Remove it?

    inline int32_t peeki(void) const;
    
    inline int32_t popi(void);
    
    inline int32_t push(int32_t i, UErrorCode &status);

    inline int32_t *reserveBlock(int32_t size, UErrorCode &status);
    inline int32_t *popFrame(int32_t size);
};


// UVector32 inlines

inline UBool UVector32::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
    if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) {
        return TRUE;
    } else {
        return expandCapacity(minimumCapacity, status);
    }
}

inline int32_t UVector32::elementAti(int32_t index) const {
    return (index >= 0 && count > 0 && count - index > 0) ? elements[index] : 0;
}


inline void UVector32::addElement(int32_t elem, UErrorCode &status) {
    if (ensureCapacity(count + 1, status)) {
        elements[count] = elem;
        count++;
    }
}

inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
    if (ensureCapacity(count+size, status) == FALSE) {
        return NULL;
    }
    int32_t  *rp = elements+count;
    count += size;
    return rp;
}

inline int32_t *UVector32::popFrame(int32_t size) {
    U_ASSERT(count >= size);
    count -= size;
    if (count < 0) {
        count = 0;
    }
    return elements+count-size;
}



inline int32_t UVector32::size(void) const {
    return count;
}

inline UBool UVector32::isEmpty(void) const {
    return count == 0;
}

inline UBool UVector32::contains(int32_t obj) const {
    return indexOf(obj) >= 0;
}

inline int32_t UVector32::lastElementi(void) const {
    return elementAti(count-1);
}

inline UBool UVector32::operator!=(const UVector32& other) {
    return !operator==(other);
}

inline int32_t *UVector32::getBuffer() const {
    return elements;
}


// UStack inlines

inline UBool UVector32::empty(void) const {
    return isEmpty();
}

inline int32_t UVector32::peeki(void) const {
    return lastElementi();
}

inline int32_t UVector32::push(int32_t i, UErrorCode &status) {
    addElement(i, status);
    return i;
}

inline int32_t UVector32::popi(void) {
    int32_t result = 0;
    if (count > 0) {
        count--;
        result = elements[count];
    }
    return result;
}

U_NAMESPACE_END



U_NAMESPACE_BEGIN

BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
                              UErrorCode &errorCode)
        : bytes_(static_cast<const uint8_t *>(trieBytes)),
          pos_(bytes_), initialPos_(bytes_),
          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
    // a public API header for which we would want it to depend only on
    // other public headers.
    // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
    // via the CharString and UVector32 implementations, so this additional
    // cost is minimal.
    str_=new CharString();
    stack_=new UVector32(errorCode);
    if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
}

BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
                              UErrorCode &errorCode)
        : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
          remainingMatchLength_(trie.remainingMatchLength_),
          initialRemainingMatchLength_(trie.remainingMatchLength_),
          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    str_=new CharString();
    stack_=new UVector32(errorCode);
    if(U_FAILURE(errorCode)) {
        return;
    }
    if(str_==NULL || stack_==NULL) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    if(length>=0) {
        // Pending linear-match node, append remaining bytes to str_.
        ++length;
        if(maxLength_>0 && length>maxLength_) {
            length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
        }
        str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
        pos_+=length;
        remainingMatchLength_-=length;
    }
}

BytesTrie::Iterator::~Iterator() {
    delete str_;
    delete stack_;
}

BytesTrie::Iterator &
BytesTrie::Iterator::reset() {
    pos_=initialPos_;
    remainingMatchLength_=initialRemainingMatchLength_;
    int32_t length=remainingMatchLength_+1;  // Remaining match length.
    if(maxLength_>0 && length>maxLength_) {
        length=maxLength_;
    }
    str_->truncate(length);
    pos_+=length;
    remainingMatchLength_-=length;
    stack_->setSize(0);
    return *this;
}

UBool
BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }

UBool
BytesTrie::Iterator::next(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return FALSE;
    }
    const uint8_t *pos=pos_;
    if(pos==NULL) {
        if(stack_->isEmpty()) {
            return FALSE;
        }
        // Pop the state off the stack and continue with the next outbound edge of
        // the branch node.
        int32_t stackSize=stack_->size();
        int32_t length=stack_->elementAti(stackSize-1);
        pos=bytes_+stack_->elementAti(stackSize-2);
        stack_->setSize(stackSize-2);
        str_->truncate(length&0xffff);
        length=(int32_t)((uint32_t)length>>16);
        if(length>1) {
            pos=branchNext(pos, length, errorCode);
            if(pos==NULL) {
                return TRUE;  // Reached a final value.
            }
        } else {
            str_->append((char)*pos++, errorCode);
        }
    }
    if(remainingMatchLength_>=0) {
        // We only get here if we started in a pending linear-match node
        // with more than maxLength remaining bytes.
        return truncateAndStop();
    }
    for(;;) {
        int32_t node=*pos++;
        if(node>=kMinValueLead) {
            // Deliver value for the byte sequence so far.
            UBool isFinal=(UBool)(node&kValueIsFinal);
            value_=readValue(pos, node>>1);
            if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
                pos_=NULL;
            } else {
                pos_=skipValue(pos, node);
            }
            return TRUE;
        }
        if(maxLength_>0 && str_->length()==maxLength_) {
            return truncateAndStop();
        }
        if(node<kMinLinearMatch) {
            if(node==0) {
                node=*pos++;
            }
            pos=branchNext(pos, node+1, errorCode);
            if(pos==NULL) {
                return TRUE;  // Reached a final value.
            }
        } else {
            // Linear-match node, append length bytes to str_.
            int32_t length=node-kMinLinearMatch+1;
            if(maxLength_>0 && str_->length()+length>maxLength_) {
                str_->append(reinterpret_cast<const char *>(pos),
                            maxLength_-str_->length(), errorCode);
                return truncateAndStop();
            }
            str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
            pos+=length;
        }
    }
}

StringPiece
BytesTrie::Iterator::getString() const {
    return str_ == NULL ? StringPiece() : str_->toStringPiece();
}

UBool
BytesTrie::Iterator::truncateAndStop() {
    pos_=NULL;
    value_=-1;  // no real value for str
    return TRUE;
}

// Branch node, needs to take the first outbound edge and push state for the rest.
const uint8_t *
BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
    while(length>kMaxBranchLinearSubNodeLength) {
        ++pos;  // ignore the comparison byte
        // Push state for the greater-or-equal edge.
        stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
        stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
        // Follow the less-than edge.
        length>>=1;
        pos=jumpByDelta(pos);
    }
    // List of key-value pairs where values are either final values or jump deltas.
    // Read the first (key, value) pair.
    uint8_t trieByte=*pos++;
    int32_t node=*pos++;
    UBool isFinal=(UBool)(node&kValueIsFinal);
    int32_t value=readValue(pos, node>>1);
    pos=skipValue(pos, node);
    stack_->addElement((int32_t)(pos-bytes_), errorCode);
    stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
    str_->append((char)trieByte, errorCode);
    if(isFinal) {
        pos_=NULL;
        value_=value;
        return NULL;
    } else {
        return pos+value;
    }
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *****************************************************************************
 * Copyright (C) 1996-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 *****************************************************************************
 */



#if !UCONFIG_NO_NORMALIZATION

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 1996-2014, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */




#if U_SHOW_CPLUSPLUS_API

#if !UCONFIG_NO_NORMALIZATION




/**
 * \file
 * \brief C++ API: Canonical Iterator
 */
 
/** Should permutation skip characters with combining class zero
 *  Should be either TRUE or FALSE. This is a compile time option
 *  @stable ICU 2.4
 */
#ifndef CANITER_SKIP_ZEROES
#define CANITER_SKIP_ZEROES TRUE
#endif

U_NAMESPACE_BEGIN

class Hashtable;
class Normalizer2;
class Normalizer2Impl;

/**
 * This class allows one to iterate through all the strings that are canonically equivalent to a given
 * string. For example, here are some sample results:
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
1: \\u0041\\u030A\\u0064\\u0307\\u0327
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
2: \\u0041\\u030A\\u0064\\u0327\\u0307
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
3: \\u0041\\u030A\\u1E0B\\u0327
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
4: \\u0041\\u030A\\u1E11\\u0307
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
5: \\u00C5\\u0064\\u0307\\u0327
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
6: \\u00C5\\u0064\\u0327\\u0307
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
7: \\u00C5\\u1E0B\\u0327
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
8: \\u00C5\\u1E11\\u0307
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
9: \\u212B\\u0064\\u0307\\u0327
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
10: \\u212B\\u0064\\u0327\\u0307
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
11: \\u212B\\u1E0B\\u0327
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
12: \\u212B\\u1E11\\u0307
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
 *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
 * since it has not been optimized for that situation.
 * Note, CanonicalIterator is not intended to be subclassed.
 * @author M. Davis
 * @author C++ port by V. Weinstein
 * @stable ICU 2.4
 */
class U_COMMON_API CanonicalIterator U_FINAL : public UObject {
public:
    /**
     * Construct a CanonicalIterator object
     * @param source    string to get results for
     * @param status    Fill-in parameter which receives the status of this operation.
     * @stable ICU 2.4
     */
    CanonicalIterator(const UnicodeString &source, UErrorCode &status);

    /** Destructor
     *  Cleans pieces
     * @stable ICU 2.4
     */
    virtual ~CanonicalIterator();

    /**
     * Gets the NFD form of the current source we are iterating over.
     * @return gets the source: NOTE: it is the NFD form of source
     * @stable ICU 2.4
     */
    UnicodeString getSource();

    /**
     * Resets the iterator so that one can start again from the beginning.
     * @stable ICU 2.4
     */
    void reset();

    /**
     * Get the next canonically equivalent string.
     * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
     * @return the next string that is canonically equivalent. A bogus string is returned when
     * the iteration is done.
     * @stable ICU 2.4
     */
    UnicodeString next();

    /**
     * Set a new source for this iterator. Allows object reuse.
     * @param newSource     the source string to iterate against. This allows the same iterator to be used
     *                     while changing the source string, saving object creation.
     * @param status        Fill-in parameter which receives the status of this operation.
     * @stable ICU 2.4
     */
    void setSource(const UnicodeString &newSource, UErrorCode &status);

#ifndef U_HIDE_INTERNAL_API
    /**
     * Dumb recursive implementation of permutation.
     * TODO: optimize
     * @param source     the string to find permutations for
     * @param skipZeros  determine if skip zeros
     * @param result     the results in a set.
     * @param status       Fill-in parameter which receives the status of this operation.
     * @internal
     */
    static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
#endif  /* U_HIDE_INTERNAL_API */

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     *
     * @stable ICU 2.2
     */
    static UClassID U_EXPORT2 getStaticClassID();

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
     * @stable ICU 2.2
     */
    virtual UClassID getDynamicClassID() const;

private:
    // ===================== PRIVATES ==============================
    // private default constructor
    CanonicalIterator();


    /**
     * Copy constructor. Private for now.
     * @internal (private)
     */
    CanonicalIterator(const CanonicalIterator& other);

    /**
     * Assignment operator. Private for now.
     * @internal (private)
     */
    CanonicalIterator& operator=(const CanonicalIterator& other);

    // fields
    UnicodeString source;
    UBool done;

    // 2 dimensional array holds the pieces of the string with
    // their different canonically equivalent representations
    UnicodeString **pieces;
    int32_t pieces_length;
    int32_t *pieces_lengths;

    // current is used in iterating to combine pieces
    int32_t *current;
    int32_t current_length;

    // transient fields
    UnicodeString buffer;

    const Normalizer2 &nfd;
    const Normalizer2Impl &nfcImpl;

    // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
    UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)

    //Set getEquivalents2(String segment);
    Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
    //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);

    /**
     * See if the decomposition of cp2 is at segment starting at segmentPos
     * (with canonical rearrangment!)
     * If so, take the remainder, and return the equivalents
     */
    //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
    Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
    //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);

    void cleanPieces();

};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_NORMALIZATION */

#endif /* U_SHOW_CPLUSPLUS_API */


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  normalizer2.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009nov22
*   created by: Markus W. Scherer
*/


/**
 * \file
 * \brief C++ API: New API for Unicode Normalization.
 */



#if U_SHOW_CPLUSPLUS_API

#if !UCONFIG_NO_NORMALIZATION






U_NAMESPACE_BEGIN

class ByteSink;

/**
 * Unicode normalization functionality for standard Unicode normalization or
 * for using custom mapping tables.
 * All instances of this class are unmodifiable/immutable.
 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
 * The Normalizer2 class is not intended for public subclassing.
 *
 * The primary functions are to produce a normalized string and to detect whether
 * a string is already normalized.
 * The most commonly used normalization forms are those defined in
 * http://www.unicode.org/unicode/reports/tr15/
 * However, this API supports additional normalization forms for specialized purposes.
 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
 * and can be used in implementations of UTS #46.
 *
 * Not only are the standard compose and decompose modes supplied,
 * but additional modes are provided as documented in the Mode enum.
 *
 * Some of the functions in this class identify normalization boundaries.
 * At a normalization boundary, the portions of the string
 * before it and starting from it do not interact and can be handled independently.
 *
 * The spanQuickCheckYes() stops at a normalization boundary.
 * When the goal is a normalized string, then the text before the boundary
 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
 *
 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
 * a character is guaranteed to be at a normalization boundary,
 * regardless of context.
 * This is used for moving from one normalization boundary to the next
 * or preceding boundary, and for performing iterative normalization.
 *
 * Iterative normalization is useful when only a small portion of a
 * longer string needs to be processed.
 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
 * (to process only the substring for which sort key bytes are computed).
 *
 * The set of normalization boundaries returned by these functions may not be
 * complete: There may be more boundaries that could be returned.
 * Different functions may return different boundaries.
 * @stable ICU 4.4
 */
class U_COMMON_API Normalizer2 : public UObject {
public:
    /**
     * Destructor.
     * @stable ICU 4.4
     */
    ~Normalizer2();

    /**
     * Returns a Normalizer2 instance for Unicode NFC normalization.
     * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFCInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFD normalization.
     * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFDInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFKC normalization.
     * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFKCInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFKD normalization.
     * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFKDInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
     * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
     * Returns an unmodifiable singleton instance. Do not delete it.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 49
     */
    static const Normalizer2 *
    getNFKCCasefoldInstance(UErrorCode &errorCode);

    /**
     * Returns a Normalizer2 instance which uses the specified data file
     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
     * and which composes or decomposes text according to the specified mode.
     * Returns an unmodifiable singleton instance. Do not delete it.
     *
     * Use packageName=NULL for data files that are part of ICU's own data.
     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
     *
     * @param packageName NULL for ICU built-in data, otherwise application data package name
     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
     * @param mode normalization mode (compose or decompose etc.)
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return the requested Normalizer2, if successful
     * @stable ICU 4.4
     */
    static const Normalizer2 *
    getInstance(const char *packageName,
                const char *name,
                UNormalization2Mode mode,
                UErrorCode &errorCode);

    /**
     * Returns the normalized form of the source string.
     * @param src source string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return normalized src
     * @stable ICU 4.4
     */
    UnicodeString
    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
        UnicodeString result;
        normalize(src, result, errorCode);
        return result;
    }
    /**
     * Writes the normalized form of the source string to the destination string
     * (replacing its contents) and returns the destination string.
     * The source and destination strings must be different objects.
     * @param src source string
     * @param dest destination string; its contents is replaced with normalized src
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return dest
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              UErrorCode &errorCode) const = 0;

    /**
     * Normalizes a UTF-8 string and optionally records how source substrings
     * relate to changed and unchanged result substrings.
     *
     * Currently implemented completely only for "compose" modes,
     * such as for NFC, NFKC, and NFKC_Casefold
     * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
     * Otherwise currently converts to & from UTF-16 and does not support edits.
     *
     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       Source UTF-8 string.
     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
     *                  sink.Flush() is called at the end.
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
     *                  This function calls edits->reset() first unless
     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @stable ICU 60
     */
    virtual void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                  Edits *edits, UErrorCode &errorCode) const;

    /**
     * Appends the normalized form of the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if the first string was normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, will be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UErrorCode &errorCode) const = 0;
    /**
     * Appends the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if both the strings were normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, should be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
           UErrorCode &errorCode) const = 0;

    /**
     * Gets the decomposition mapping of c.
     * Roughly equivalent to normalizing the String form of c
     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
     * returns FALSE and does not write a string
     * if c does not have a decomposition mapping in this instance's data.
     * This function is independent of the mode of the Normalizer2.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      decomposition mapping, if there is one.
     * @return TRUE if c has a decomposition, otherwise FALSE
     * @stable ICU 4.6
     */
    virtual UBool
    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;

    /**
     * Gets the raw decomposition mapping of c.
     *
     * This is similar to the getDecomposition() method but returns the
     * raw decomposition mapping as specified in UnicodeData.txt or
     * (for custom data) in the mapping files processed by the gennorm2 tool.
     * By contrast, getDecomposition() returns the processed,
     * recursively-decomposed version of this mapping.
     *
     * When used on a standard NFKC Normalizer2 instance,
     * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
     *
     * When used on a standard NFC Normalizer2 instance,
     * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
     * in this case, the result contains either one or two code points (=1..4 char16_ts).
     *
     * This function is independent of the mode of the Normalizer2.
     * The default implementation returns FALSE.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      raw decomposition mapping, if there is one.
     * @return TRUE if c has a decomposition, otherwise FALSE
     * @stable ICU 49
     */
    virtual UBool
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;

    /**
     * Performs pairwise composition of a & b and returns the composite if there is one.
     *
     * Returns a composite code point c only if c has a two-way mapping to a+b.
     * In standard Unicode normalization, this means that
     * c has a canonical decomposition to a+b
     * and c does not have the Full_Composition_Exclusion property.
     *
     * This function is independent of the mode of the Normalizer2.
     * The default implementation returns a negative value.
     * @param a A (normalization starter) code point.
     * @param b Another code point.
     * @return The non-negative composite code point if there is one; otherwise a negative value.
     * @stable ICU 49
     */
    virtual UChar32
    composePair(UChar32 a, UChar32 b) const;

    /**
     * Gets the combining class of c.
     * The default implementation returns 0
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
     * @param c code point
     * @return c's combining class
     * @stable ICU 49
     */
    virtual uint8_t
    getCombiningClass(UChar32 c) const;

    /**
     * Tests if the string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
     * (which is only possible for the two COMPOSE modes) this method
     * resolves to "yes" or "no" to provide a definitive result,
     * at the cost of doing more work in those cases.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return TRUE if s is normalized
     * @stable ICU 4.4
     */
    virtual UBool
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
    /**
     * Tests if the UTF-8 string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
     * (which is only possible for the two COMPOSE modes) this method
     * resolves to "yes" or "no" to provide a definitive result,
     * at the cost of doing more work in those cases.
     *
     * This works for all normalization modes,
     * but it is currently optimized for UTF-8 only for "compose" modes,
     * such as for NFC, NFKC, and NFKC_Casefold
     * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
     * For other modes it currently converts to UTF-16 and calls isNormalized().
     *
     * @param s UTF-8 input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return TRUE if s is normalized
     * @stable ICU 60
     */
    virtual UBool
    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;


    /**
     * Tests if the string is normalized.
     * For the two COMPOSE modes, the result could be "maybe" in cases that
     * would take a little more work to resolve definitively.
     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
     * combination of quick check + normalization, to avoid
     * re-checking the "yes" prefix.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return UNormalizationCheckResult
     * @stable ICU 4.4
     */
    virtual UNormalizationCheckResult
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;

    /**
     * Returns the end of the normalized substring of the input string.
     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
     * the substring <code>UnicodeString(s, 0, end)</code>
     * will pass the quick check with a "yes" result.
     *
     * The returned end index is usually one or more characters before the
     * "no" or "maybe" character: The end index is at a normalization boundary.
     * (See the class documentation for more about normalization boundaries.)
     *
     * When the goal is a normalized string and most input strings are expected
     * to be normalized already, then call this method,
     * and if it returns a prefix shorter than the input string,
     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return "yes" span end index
     * @stable ICU 4.4
     */
    virtual int32_t
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;

    /**
     * Tests if the character always has a normalization boundary before it,
     * regardless of context.
     * If true, then the character does not normalization-interact with
     * preceding characters.
     * In other words, a string containing this character can be normalized
     * by processing portions before this character and starting from this
     * character independently.
     * This is used for iterative normalization. See the class documentation for details.
     * @param c character to test
     * @return TRUE if c has a normalization boundary before it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;

    /**
     * Tests if the character always has a normalization boundary after it,
     * regardless of context.
     * If true, then the character does not normalization-interact with
     * following characters.
     * In other words, a string containing this character can be normalized
     * by processing portions up to this character and after this
     * character independently.
     * This is used for iterative normalization. See the class documentation for details.
     * Note that this operation may be significantly slower than hasBoundaryBefore().
     * @param c character to test
     * @return TRUE if c has a normalization boundary after it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;

    /**
     * Tests if the character is normalization-inert.
     * If true, then the character does not change, nor normalization-interact with
     * preceding or following characters.
     * In other words, a string containing this character can be normalized
     * by processing portions before this character and after this
     * character independently.
     * This is used for iterative normalization. See the class documentation for details.
     * Note that this operation may be significantly slower than hasBoundaryBefore().
     * @param c character to test
     * @return TRUE if c is normalization-inert
     * @stable ICU 4.4
     */
    virtual UBool isInert(UChar32 c) const = 0;
};

/**
 * Normalization filtered by a UnicodeSet.
 * Normalizes portions of the text contained in the filter set and leaves
 * portions not contained in the filter set unchanged.
 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
 * This class implements all of (and only) the Normalizer2 API.
 * An instance of this class is unmodifiable/immutable but is constructed and
 * must be destructed by the owner.
 * @stable ICU 4.4
 */
class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
public:
    /**
     * Constructs a filtered normalizer wrapping any Normalizer2 instance
     * and a filter set.
     * Both are aliased and must not be modified or deleted while this object
     * is used.
     * The filter set should be frozen; otherwise the performance will suffer greatly.
     * @param n2 wrapped Normalizer2 instance
     * @param filterSet UnicodeSet which determines the characters to be normalized
     * @stable ICU 4.4
     */
    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
            norm2(n2), set(filterSet) {}

    /**
     * Destructor.
     * @stable ICU 4.4
     */
    ~FilteredNormalizer2();

    /**
     * Writes the normalized form of the source string to the destination string
     * (replacing its contents) and returns the destination string.
     * The source and destination strings must be different objects.
     * @param src source string
     * @param dest destination string; its contents is replaced with normalized src
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return dest
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Normalizes a UTF-8 string and optionally records how source substrings
     * relate to changed and unchanged result substrings.
     *
     * Currently implemented completely only for "compose" modes,
     * such as for NFC, NFKC, and NFKC_Casefold
     * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
     * Otherwise currently converts to & from UTF-16 and does not support edits.
     *
     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
     * @param src       Source UTF-8 string.
     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
     *                  sink.Flush() is called at the end.
     * @param edits     Records edits for index mapping, working with styled text,
     *                  and getting only changes (if any).
     *                  The Edits contents is undefined if any error occurs.
     *                  This function calls edits->reset() first unless
     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @stable ICU 60
     */
    virtual void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Appends the normalized form of the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if the first string was normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, will be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Appends the second string to the first string
     * (merging them at the boundary) and returns the first string.
     * The result is normalized if both the strings were normalized.
     * The first and second strings must be different objects.
     * @param first string, should be normalized
     * @param second string, should be normalized
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return first
     * @stable ICU 4.4
     */
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
           UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Gets the decomposition mapping of c.
     * For details see the base class documentation.
     *
     * This function is independent of the mode of the Normalizer2.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      decomposition mapping, if there is one.
     * @return TRUE if c has a decomposition, otherwise FALSE
     * @stable ICU 4.6
     */
    virtual UBool
    getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;

    /**
     * Gets the raw decomposition mapping of c.
     * For details see the base class documentation.
     *
     * This function is independent of the mode of the Normalizer2.
     * @param c code point
     * @param decomposition String object which will be set to c's
     *                      raw decomposition mapping, if there is one.
     * @return TRUE if c has a decomposition, otherwise FALSE
     * @stable ICU 49
     */
    virtual UBool
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;

    /**
     * Performs pairwise composition of a & b and returns the composite if there is one.
     * For details see the base class documentation.
     *
     * This function is independent of the mode of the Normalizer2.
     * @param a A (normalization starter) code point.
     * @param b Another code point.
     * @return The non-negative composite code point if there is one; otherwise a negative value.
     * @stable ICU 49
     */
    virtual UChar32
    composePair(UChar32 a, UChar32 b) const U_OVERRIDE;

    /**
     * Gets the combining class of c.
     * The default implementation returns 0
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
     * @param c code point
     * @return c's combining class
     * @stable ICU 49
     */
    virtual uint8_t
    getCombiningClass(UChar32 c) const U_OVERRIDE;

    /**
     * Tests if the string is normalized.
     * For details see the Normalizer2 base class documentation.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return TRUE if s is normalized
     * @stable ICU 4.4
     */
    virtual UBool
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Tests if the UTF-8 string is normalized.
     * Internally, in cases where the quickCheck() method would return "maybe"
     * (which is only possible for the two COMPOSE modes) this method
     * resolves to "yes" or "no" to provide a definitive result,
     * at the cost of doing more work in those cases.
     *
     * This works for all normalization modes,
     * but it is currently optimized for UTF-8 only for "compose" modes,
     * such as for NFC, NFKC, and NFKC_Casefold
     * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
     * For other modes it currently converts to UTF-16 and calls isNormalized().
     *
     * @param s UTF-8 input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return TRUE if s is normalized
     * @stable ICU 60
     */
    virtual UBool
    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Tests if the string is normalized.
     * For details see the Normalizer2 base class documentation.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return UNormalizationCheckResult
     * @stable ICU 4.4
     */
    virtual UNormalizationCheckResult
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
    /**
     * Returns the end of the normalized substring of the input string.
     * For details see the Normalizer2 base class documentation.
     * @param s input string
     * @param errorCode Standard ICU error code. Its input value must
     *                  pass the U_SUCCESS() test, or else the function returns
     *                  immediately. Check for U_FAILURE() on output or use with
     *                  function chaining. (See User Guide for details.)
     * @return "yes" span end index
     * @stable ICU 4.4
     */
    virtual int32_t
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;

    /**
     * Tests if the character always has a normalization boundary before it,
     * regardless of context.
     * For details see the Normalizer2 base class documentation.
     * @param c character to test
     * @return TRUE if c has a normalization boundary before it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;

    /**
     * Tests if the character always has a normalization boundary after it,
     * regardless of context.
     * For details see the Normalizer2 base class documentation.
     * @param c character to test
     * @return TRUE if c has a normalization boundary after it
     * @stable ICU 4.4
     */
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;

    /**
     * Tests if the character is normalization-inert.
     * For details see the Normalizer2 base class documentation.
     * @param c character to test
     * @return TRUE if c is normalization-inert
     * @stable ICU 4.4
     */
    virtual UBool isInert(UChar32 c) const U_OVERRIDE;
private:
    UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              USetSpanCondition spanCondition,
              UErrorCode &errorCode) const;

    void
    normalizeUTF8(uint32_t options, const char *src, int32_t length,
                  ByteSink &sink, Edits *edits,
                  USetSpanCondition spanCondition,
                  UErrorCode &errorCode) const;

    UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UBool doNormalize,
                             UErrorCode &errorCode) const;

    const Normalizer2 &norm2;
    const UnicodeSet &set;
};

U_NAMESPACE_END

#endif  // !UCONFIG_NO_NORMALIZATION

#endif /* U_SHOW_CPLUSPLUS_API */




// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2002-2014, International Business Machines
* Corporation and others.  All Rights Reserved.
**********************************************************************
*/



#if U_SHOW_CPLUSPLUS_API




/**
 * \file 
 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
 */

U_NAMESPACE_BEGIN

class UnicodeSet;
class UnicodeString;

/**
 *
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
 * iterates over either code points or code point ranges.  After all
 * code points or ranges have been returned, it returns the
 * multicharacter strings of the UnicodeSet, if any.
 *
 * This class is not intended to be subclassed.  Consider any fields
 *  or methods declared as "protected" to be private.  The use of
 *  protected in this class is an artifact of history.
 *
 * <p>To iterate over code points and strings, use a loop like this:
 * <pre>
 * UnicodeSetIterator it(set);
 * while (it.next()) {
 *     processItem(it.getString());
 * }
 * </pre>
 * <p>Each item in the set is accessed as a string.  Set elements
 *    consisting of single code points are returned as strings containing
 *    just the one code point.
 *
 * <p>To iterate over code point ranges, instead of individual code points,
 *    use a loop like this:
 * <pre>
 * UnicodeSetIterator it(set);
 * while (it.nextRange()) {
 *   if (it.isString()) {
 *     processString(it.getString());
 *   } else {
 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
 *   }
 * }
 * </pre>
 * @author M. Davis
 * @stable ICU 2.4
 */
class U_COMMON_API UnicodeSetIterator : public UObject {

 protected:

    /**
     * Value of <tt>codepoint</tt> if the iterator points to a string.
     * If <tt>codepoint == IS_STRING</tt>, then examine
     * <tt>string</tt> for the current iteration result.
     * @stable ICU 2.4
     */
    enum { IS_STRING = -1 };

    /**
     * Current code point, or the special value <tt>IS_STRING</tt>, if
     * the iterator points to a string.
     * @stable ICU 2.4
     */
    UChar32 codepoint;

    /**
     * When iterating over ranges using <tt>nextRange()</tt>,
     * <tt>codepointEnd</tt> contains the inclusive end of the
     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
     * iterating over code points using <tt>next()</tt>, or if
     * <tt>codepoint == IS_STRING</tt>, then the value of
     * <tt>codepointEnd</tt> is undefined.
     * @stable ICU 2.4
     */
    UChar32 codepointEnd;

    /**
     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
     * value of <tt>string</tt> is undefined.
     * @stable ICU 2.4
     */
    const UnicodeString* string;

 public:

    /**
     * Create an iterator over the given set.  The iterator is valid
     * only so long as <tt>set</tt> is valid.
     * @param set set to iterate over
     * @stable ICU 2.4
     */
    UnicodeSetIterator(const UnicodeSet& set);

    /**
     * Create an iterator over nothing.  <tt>next()</tt> and
     * <tt>nextRange()</tt> return false. This is a convenience
     * constructor allowing the target to be set later.
     * @stable ICU 2.4
     */
    UnicodeSetIterator();

    /**
     * Destructor.
     * @stable ICU 2.4
     */
    virtual ~UnicodeSetIterator();

    /**
     * Returns true if the current element is a string.  If so, the
     * caller can retrieve it with <tt>getString()</tt>.  If this
     * method returns false, the current element is a code point or
     * code point range, depending on whether <tt>next()</tt> or
     * <tt>nextRange()</tt> was called.
     * Elements of types string and codepoint can both be retrieved
     * with the function <tt>getString()</tt>.
     * Elements of type codepoint can also be retrieved with
     * <tt>getCodepoint()</tt>.
     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
     * of the range, and <tt>getCodepointEnd()</tt> returns the end
     * of the range.
     * @stable ICU 2.4
     */
    inline UBool isString() const;

    /**
     * Returns the current code point, if <tt>isString()</tt> returned
     * false.  Otherwise returns an undefined result.
     * @stable ICU 2.4
     */
    inline UChar32 getCodepoint() const;

    /**
     * Returns the end of the current code point range, if
     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
     * called.  Otherwise returns an undefined result.
     * @stable ICU 2.4
     */
    inline UChar32 getCodepointEnd() const;

    /**
     * Returns the current string, if <tt>isString()</tt> returned
     * true.  If the current iteration item is a code point, a UnicodeString
     * containing that single code point is returned.
     *
     * Ownership of the returned string remains with the iterator.
     * The string is guaranteed to remain valid only until the iterator is
     *   advanced to the next item, or until the iterator is deleted.
     * 
     * @stable ICU 2.4
     */
    const UnicodeString& getString();

    /**
     * Advances the iteration position to the next element in the set, 
     * which can be either a single code point or a string.  
     * If there are no more elements in the set, return false.
     *
     * <p>
     * If <tt>isString() == TRUE</tt>, the value is a
     * string, otherwise the value is a
     * single code point.  Elements of either type can be retrieved
     * with the function <tt>getString()</tt>, while elements of
     * consisting of a single code point can be retrieved with
     * <tt>getCodepoint()</tt>
     *
     * <p>The order of iteration is all code points in sorted order,
     * followed by all strings sorted order.    Do not mix
     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
     * calling <tt>reset()</tt> between them.  The results of doing so
     * are undefined.
     *
     * @return true if there was another element in the set.
     * @stable ICU 2.4
     */
    UBool next();

    /**
     * Returns the next element in the set, either a code point range
     * or a string.  If there are no more elements in the set, return
     * false.  If <tt>isString() == TRUE</tt>, the value is a
     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
     * range of one or more code points from <tt>getCodepoint()</tt> to
     * <tt>getCodepointeEnd()</tt> inclusive.
     *
     * <p>The order of iteration is all code points ranges in sorted
     * order, followed by all strings sorted order.  Ranges are
     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
     * <tt>reset()</tt> between them.  The results of doing so are
     * undefined.
     *
     * @return true if there was another element in the set.
     * @stable ICU 2.4
     */
    UBool nextRange();

    /**
     * Sets this iterator to visit the elements of the given set and
     * resets it to the start of that set.  The iterator is valid only
     * so long as <tt>set</tt> is valid.
     * @param set the set to iterate over.
     * @stable ICU 2.4
     */
    void reset(const UnicodeSet& set);

    /**
     * Resets this iterator to the start of the set.
     * @stable ICU 2.4
     */
    void reset();

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     *
     * @stable ICU 2.4
     */
    static UClassID U_EXPORT2 getStaticClassID();

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
     * @stable ICU 2.4
     */
    virtual UClassID getDynamicClassID() const;

    // ======================= PRIVATES ===========================

 protected:

    // endElement and nextElements are really UChar32's, but we keep
    // them as signed int32_t's so we can do comparisons with
    // endElement set to -1.  Leave them as int32_t's.
    /** The set
     * @stable ICU 2.4
     */
    const UnicodeSet* set;
    /** End range
     * @stable ICU 2.4
     */
    int32_t endRange;
    /** Range
     * @stable ICU 2.4
     */
    int32_t range;
    /** End element
     * @stable ICU 2.4
     */
    int32_t endElement;
    /** Next element
     * @stable ICU 2.4
     */
    int32_t nextElement;
    //UBool abbreviated;
    /** Next string
     * @stable ICU 2.4
     */
    int32_t nextString;
    /** String count
     * @stable ICU 2.4
     */
    int32_t stringCount;

    /**
     *  Points to the string to use when the caller asks for a
     *  string and the current iteration item is a code point, not a string.
     *  @internal
     */
    UnicodeString *cpString;

    /** Copy constructor. Disallowed.
     * @stable ICU 2.4
     */
    UnicodeSetIterator(const UnicodeSetIterator&); // disallow

    /** Assignment operator. Disallowed.
     * @stable ICU 2.4
     */
    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow

    /** Load range
     * @stable ICU 2.4
     */
    virtual void loadRange(int32_t range);

};

inline UBool UnicodeSetIterator::isString() const {
    return codepoint == (UChar32)IS_STRING;
}

inline UChar32 UnicodeSetIterator::getCodepoint() const {
    return codepoint;
}

inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
    return codepointEnd;
}


U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1998-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*
* File ustring.h
*
* Modification History:
*
*   Date        Name        Description
*   12/07/98    bertrand    Creation.
******************************************************************************
*/






/**
 * \def UBRK_TYPEDEF_UBREAK_ITERATOR
 * @internal 
 */

#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
#   define UBRK_TYPEDEF_UBREAK_ITERATOR
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
    typedef struct UBreakIterator UBreakIterator;
#endif

/**
 * \file
 * \brief C API: Unicode string handling functions
 *
 * These C API functions provide general Unicode string handling.
 *
 * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
 * functions. (For example, they do not check for bad arguments like NULL string pointers.)
 * In some cases, only the thread-safe variant of such a function is implemented here
 * (see u_strtok_r()).
 *
 * Other functions provide more Unicode-specific functionality like locale-specific
 * upper/lower-casing and string comparison in code point order.
 *
 * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
 * UTF-16 encodes each Unicode code point with either one or two UChar code units.
 * (This is the default form of Unicode, and a forward-compatible extension of the original,
 * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
 * in 1996.)
 *
 * Some APIs accept a 32-bit UChar32 value for a single code point.
 *
 * ICU also handles 16-bit Unicode text with unpaired surrogates.
 * Such text is not well-formed UTF-16.
 * Code-point-related functions treat unpaired surrogates as surrogate code points,
 * i.e., as separate units.
 *
 * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
 * it is much more efficient even for random access because the code unit values
 * for single-unit characters vs. lead units vs. trail units are completely disjoint.
 * This means that it is easy to determine character (code point) boundaries from
 * random offsets in the string.
 *
 * Unicode (UTF-16) string processing is optimized for the single-unit case.
 * Although it is important to support supplementary characters
 * (which use pairs of lead/trail code units called "surrogates"),
 * their occurrence is rare. Almost all characters in modern use require only
 * a single UChar code unit (i.e., their code point values are <=0xffff).
 *
 * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
 * For a discussion of the handling of unpaired surrogates see also
 * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
 */

/**
 * \defgroup ustring_ustrlen String Length
 * \ingroup ustring_strlen
 */
/*@{*/
/**
 * Determine the length of an array of UChar.
 *
 * @param s The array of UChars, NULL (U+0000) terminated.
 * @return The number of UChars in <code>chars</code>, minus the terminator.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strlen(const UChar *s);
/*@}*/

/**
 * Count Unicode code points in the length UChar code units of the string.
 * A code point may occupy either one or two UChar code units.
 * Counting code points involves reading all code units.
 *
 * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
 *
 * @param s The input string.
 * @param length The number of UChar code units to be checked, or -1 to count all
 *               code points before the first NUL (U+0000).
 * @return The number of code points in the specified code units.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_countChar32(const UChar *s, int32_t length);

/**
 * Check if the string contains more Unicode code points than a certain number.
 * This is more efficient than counting all code points in the entire string
 * and comparing that number with a threshold.
 * This function may not need to scan the string at all if the length is known
 * (not -1 for NUL-termination) and falls within a certain range, and
 * never needs to count more than 'number+1' code points.
 * Logically equivalent to (u_countChar32(s, length)>number).
 * A Unicode code point may occupy either one or two UChar code units.
 *
 * @param s The input string.
 * @param length The length of the string, or -1 if it is NUL-terminated.
 * @param number The number of code points in the string is compared against
 *               the 'number' parameter.
 * @return Boolean value for whether the string contains more Unicode code points
 *         than 'number'. Same as (u_countChar32(s, length)>number).
 * @stable ICU 2.4
 */
U_STABLE UBool U_EXPORT2
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);

/**
 * Concatenate two ustrings.  Appends a copy of <code>src</code>,
 * including the null terminator, to <code>dst</code>. The initial copied
 * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
 *
 * @param dst The destination string.
 * @param src The source string.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_strcat(UChar     *dst, 
    const UChar     *src);

/**
 * Concatenate two ustrings.  
 * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
 * Adds a terminating NUL.
 * If src is too long, then only <code>n-1</code> characters will be copied
 * before the terminating NUL.
 * If <code>n&lt;=0</code> then dst is not modified.
 *
 * @param dst The destination string.
 * @param src The source string (can be NULL/invalid if n<=0).
 * @param n The maximum number of characters to append; no-op if <=0.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_strncat(UChar     *dst, 
     const UChar     *src, 
     int32_t     n);

/**
 * Find the first occurrence of a substring in a string.
 * The substring is found at code point boundaries.
 * That means that if the substring begins with
 * a trail surrogate or ends with a lead surrogate,
 * then it is found only if these surrogates stand alone in the text.
 * Otherwise, the substring edge units would be matched against
 * halves of surrogate pairs.
 *
 * @param s The string to search (NUL-terminated).
 * @param substring The substring to find (NUL-terminated).
 * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
 *         or <code>s</code> itself if the <code>substring</code> is empty,
 *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
 * @stable ICU 2.0
 *
 * @see u_strrstr
 * @see u_strFindFirst
 * @see u_strFindLast
 */
U_STABLE UChar * U_EXPORT2
u_strstr(const UChar *s, const UChar *substring);

/**
 * Find the first occurrence of a substring in a string.
 * The substring is found at code point boundaries.
 * That means that if the substring begins with
 * a trail surrogate or ends with a lead surrogate,
 * then it is found only if these surrogates stand alone in the text.
 * Otherwise, the substring edge units would be matched against
 * halves of surrogate pairs.
 *
 * @param s The string to search.
 * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
 * @param substring The substring to find (NUL-terminated).
 * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
 * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
 *         or <code>s</code> itself if the <code>substring</code> is empty,
 *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strstr
 * @see u_strFindLast
 */
U_STABLE UChar * U_EXPORT2
u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);

/**
 * Find the first occurrence of a BMP code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (NUL-terminated).
 * @param c The BMP code point to find.
 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.0
 *
 * @see u_strchr32
 * @see u_memchr
 * @see u_strstr
 * @see u_strFindFirst
 */
U_STABLE UChar * U_EXPORT2
u_strchr(const UChar *s, UChar c);

/**
 * Find the first occurrence of a code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (NUL-terminated).
 * @param c The code point to find.
 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.0
 *
 * @see u_strchr
 * @see u_memchr32
 * @see u_strstr
 * @see u_strFindFirst
 */
U_STABLE UChar * U_EXPORT2
u_strchr32(const UChar *s, UChar32 c);

/**
 * Find the last occurrence of a substring in a string.
 * The substring is found at code point boundaries.
 * That means that if the substring begins with
 * a trail surrogate or ends with a lead surrogate,
 * then it is found only if these surrogates stand alone in the text.
 * Otherwise, the substring edge units would be matched against
 * halves of surrogate pairs.
 *
 * @param s The string to search (NUL-terminated).
 * @param substring The substring to find (NUL-terminated).
 * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
 *         or <code>s</code> itself if the <code>substring</code> is empty,
 *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strstr
 * @see u_strFindFirst
 * @see u_strFindLast
 */
U_STABLE UChar * U_EXPORT2
u_strrstr(const UChar *s, const UChar *substring);

/**
 * Find the last occurrence of a substring in a string.
 * The substring is found at code point boundaries.
 * That means that if the substring begins with
 * a trail surrogate or ends with a lead surrogate,
 * then it is found only if these surrogates stand alone in the text.
 * Otherwise, the substring edge units would be matched against
 * halves of surrogate pairs.
 *
 * @param s The string to search.
 * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
 * @param substring The substring to find (NUL-terminated).
 * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
 * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
 *         or <code>s</code> itself if the <code>substring</code> is empty,
 *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strstr
 * @see u_strFindLast
 */
U_STABLE UChar * U_EXPORT2
u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);

/**
 * Find the last occurrence of a BMP code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (NUL-terminated).
 * @param c The BMP code point to find.
 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strrchr32
 * @see u_memrchr
 * @see u_strrstr
 * @see u_strFindLast
 */
U_STABLE UChar * U_EXPORT2
u_strrchr(const UChar *s, UChar c);

/**
 * Find the last occurrence of a code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (NUL-terminated).
 * @param c The code point to find.
 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strrchr
 * @see u_memchr32
 * @see u_strrstr
 * @see u_strFindLast
 */
U_STABLE UChar * U_EXPORT2
u_strrchr32(const UChar *s, UChar32 c);

/**
 * Locates the first occurrence in the string <code>string</code> of any of the characters
 * in the string <code>matchSet</code>.
 * Works just like C's strpbrk but with Unicode.
 *
 * @param string The string in which to search, NUL-terminated.
 * @param matchSet A NUL-terminated string defining a set of code points
 *                 for which to search in the text string.
 * @return A pointer to the  character in <code>string</code> that matches one of the
 *         characters in <code>matchSet</code>, or NULL if no such character is found.
 * @stable ICU 2.0
 */
U_STABLE UChar * U_EXPORT2
u_strpbrk(const UChar *string, const UChar *matchSet);

/**
 * Returns the number of consecutive characters in <code>string</code>,
 * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
 * Works just like C's strcspn but with Unicode.
 *
 * @param string The string in which to search, NUL-terminated.
 * @param matchSet A NUL-terminated string defining a set of code points
 *                 for which to search in the text string.
 * @return The number of initial characters in <code>string</code> that do not
 *         occur in <code>matchSet</code>.
 * @see u_strspn
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strcspn(const UChar *string, const UChar *matchSet);

/**
 * Returns the number of consecutive characters in <code>string</code>,
 * beginning with the first, that occur somewhere in <code>matchSet</code>.
 * Works just like C's strspn but with Unicode.
 *
 * @param string The string in which to search, NUL-terminated.
 * @param matchSet A NUL-terminated string defining a set of code points
 *                 for which to search in the text string.
 * @return The number of initial characters in <code>string</code> that do
 *         occur in <code>matchSet</code>.
 * @see u_strcspn
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strspn(const UChar *string, const UChar *matchSet);

/**
 * The string tokenizer API allows an application to break a string into
 * tokens. Unlike strtok(), the saveState (the current pointer within the
 * original string) is maintained in saveState. In the first call, the
 * argument src is a pointer to the string. In subsequent calls to
 * return successive tokens of that string, src must be specified as
 * NULL. The value saveState is set by this function to maintain the
 * function's position within the string, and on each subsequent call
 * you must give this argument the same variable. This function does
 * handle surrogate pairs. This function is similar to the strtok_r()
 * the POSIX Threads Extension (1003.1c-1995) version.
 *
 * @param src String containing token(s). This string will be modified.
 *            After the first call to u_strtok_r(), this argument must
 *            be NULL to get to the next token.
 * @param delim Set of delimiter characters (Unicode code points).
 * @param saveState The current pointer within the original string,
 *              which is set by this function. The saveState
 *              parameter should the address of a local variable of type
 *              UChar *. (i.e. defined "UChar *myLocalSaveState" and use
 *              &myLocalSaveState for this parameter).
 * @return A pointer to the next token found in src, or NULL
 *         when there are no more tokens.
 * @stable ICU 2.0
 */
U_STABLE UChar * U_EXPORT2
u_strtok_r(UChar    *src, 
     const UChar    *delim,
           UChar   **saveState);

/**
 * Compare two Unicode strings for bitwise equality (code unit order).
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
 * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
 * value if <code>s1</code> is bitwise greater than <code>s2</code>.
 * @stable ICU 2.0
 */
U_STABLE int32_t  U_EXPORT2
u_strcmp(const UChar     *s1, 
         const UChar     *s2);

/**
 * Compare two Unicode strings in code point order.
 * See u_strCompare for details.
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @return a negative/zero/positive integer corresponding to whether
 * the first string is less than/equal to/greater than the second one
 * in code point order
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);

/**
 * Compare two Unicode strings (binary order).
 *
 * The comparison can be done in code unit order or in code point order.
 * They differ only in UTF-16 when
 * comparing supplementary code points (U+10000..U+10ffff)
 * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
 * In code unit order, high BMP code points sort after supplementary code points
 * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
 *
 * This functions works with strings of different explicitly specified lengths
 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
 * NUL-terminated strings are possible with length arguments of -1.
 *
 * @param s1 First source string.
 * @param length1 Length of first source string, or -1 if NUL-terminated.
 *
 * @param s2 Second source string.
 * @param length2 Length of second source string, or -1 if NUL-terminated.
 *
 * @param codePointOrder Choose between code unit order (FALSE)
 *                       and code point order (TRUE).
 *
 * @return <0 or 0 or >0 as usual for string comparisons
 *
 * @stable ICU 2.2
 */
U_STABLE int32_t U_EXPORT2
u_strCompare(const UChar *s1, int32_t length1,
             const UChar *s2, int32_t length2,
             UBool codePointOrder);

/**
 * Compare two Unicode strings (binary order)
 * as presented by UCharIterator objects.
 * Works otherwise just like u_strCompare().
 *
 * Both iterators are reset to their start positions.
 * When the function returns, it is undefined where the iterators
 * have stopped.
 *
 * @param iter1 First source string iterator.
 * @param iter2 Second source string iterator.
 * @param codePointOrder Choose between code unit order (FALSE)
 *                       and code point order (TRUE).
 *
 * @return <0 or 0 or >0 as usual for string comparisons
 *
 * @see u_strCompare
 *
 * @stable ICU 2.6
 */
U_STABLE int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);

/**
 * Compare two strings case-insensitively using full case folding.
 * This is equivalent to
 *   u_strCompare(u_strFoldCase(s1, options),
 *                u_strFoldCase(s2, options),
 *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
 *
 * The comparison can be done in UTF-16 code unit order or in code point order.
 * They differ only when comparing supplementary code points (U+10000..U+10ffff)
 * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
 * In code unit order, high BMP code points sort after supplementary code points
 * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
 *
 * This functions works with strings of different explicitly specified lengths
 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
 * NUL-terminated strings are possible with length arguments of -1.
 *
 * @param s1 First source string.
 * @param length1 Length of first source string, or -1 if NUL-terminated.
 *
 * @param s2 Second source string.
 * @param length2 Length of second source string, or -1 if NUL-terminated.
 *
 * @param options A bit set of options:
 *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 *     Comparison in code unit order with default case folding.
 *
 *   - U_COMPARE_CODE_POINT_ORDER
 *     Set to choose code point order instead of code unit order
 *     (see u_strCompare for details).
 *
 *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 *
 * @param pErrorCode Must be a valid pointer to an error code value,
 *                  which must not indicate a failure before the function call.
 *
 * @return <0 or 0 or >0 as usual for string comparisons
 *
 * @stable ICU 2.2
 */
U_STABLE int32_t U_EXPORT2
u_strCaseCompare(const UChar *s1, int32_t length1,
                 const UChar *s2, int32_t length2,
                 uint32_t options,
                 UErrorCode *pErrorCode);

/**
 * Compare two ustrings for bitwise equality. 
 * Compares at most <code>n</code> characters.
 *
 * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
 * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
 * @param n The maximum number of characters to compare; always returns 0 if n<=0.
 * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
 * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
 * value if <code>s1</code> is bitwise greater than <code>s2</code>.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strncmp(const UChar     *ucs1, 
     const UChar     *ucs2, 
     int32_t     n);

/**
 * Compare two Unicode strings in code point order.
 * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
 * For details, see u_strCompare().
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @param n The maximum number of characters to compare.
 * @return a negative/zero/positive integer corresponding to whether
 * the first string is less than/equal to/greater than the second one
 * in code point order
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);

/**
 * Compare two strings case-insensitively using full case folding.
 * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @param options A bit set of options:
 *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 *     Comparison in code unit order with default case folding.
 *
 *   - U_COMPARE_CODE_POINT_ORDER
 *     Set to choose code point order instead of code unit order
 *     (see u_strCompare for details).
 *
 *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 *
 * @return A negative, zero, or positive integer indicating the comparison result.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);

/**
 * Compare two strings case-insensitively using full case folding.
 * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
 * u_strFoldCase(s2, at most n, options)).
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @param n The maximum number of characters each string to case-fold and then compare.
 * @param options A bit set of options:
 *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 *     Comparison in code unit order with default case folding.
 *
 *   - U_COMPARE_CODE_POINT_ORDER
 *     Set to choose code point order instead of code unit order
 *     (see u_strCompare for details).
 *
 *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 *
 * @return A negative, zero, or positive integer indicating the comparison result.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);

/**
 * Compare two strings case-insensitively using full case folding.
 * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
 * u_strFoldCase(s2, n, options)).
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @param length The number of characters in each string to case-fold and then compare.
 * @param options A bit set of options:
 *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 *     Comparison in code unit order with default case folding.
 *
 *   - U_COMPARE_CODE_POINT_ORDER
 *     Set to choose code point order instead of code unit order
 *     (see u_strCompare for details).
 *
 *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 *
 * @return A negative, zero, or positive integer indicating the comparison result.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);

/**
 * Copy a ustring. Adds a null terminator.
 *
 * @param dst The destination string.
 * @param src The source string.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_strcpy(UChar     *dst, 
    const UChar     *src);

/**
 * Copy a ustring.
 * Copies at most <code>n</code> characters.  The result will be null terminated
 * if the length of <code>src</code> is less than <code>n</code>.
 *
 * @param dst The destination string.
 * @param src The source string (can be NULL/invalid if n<=0).
 * @param n The maximum number of characters to copy; no-op if <=0.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_strncpy(UChar     *dst, 
     const UChar     *src, 
     int32_t     n);

#if !UCONFIG_NO_CONVERSION

/**
 * Copy a byte string encoded in the default codepage to a ustring.
 * Adds a null terminator.
 * Performs a host byte to UChar conversion
 *
 * @param dst The destination string.
 * @param src The source string.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
               const char *src );

/**
 * Copy a byte string encoded in the default codepage to a ustring.
 * Copies at most <code>n</code> characters.  The result will be null terminated
 * if the length of <code>src</code> is less than <code>n</code>.
 * Performs a host byte to UChar conversion
 *
 * @param dst The destination string.
 * @param src The source string.
 * @param n The maximum number of characters to copy.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
            const char *src,
            int32_t n);

/**
 * Copy ustring to a byte string encoded in the default codepage.
 * Adds a null terminator.
 * Performs a UChar to host byte conversion
 *
 * @param dst The destination string.
 * @param src The source string.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
            const UChar *src );

/**
 * Copy ustring to a byte string encoded in the default codepage.
 * Copies at most <code>n</code> characters.  The result will be null terminated
 * if the length of <code>src</code> is less than <code>n</code>.
 * Performs a UChar to host byte conversion
 *
 * @param dst The destination string.
 * @param src The source string.
 * @param n The maximum number of characters to copy.
 * @return A pointer to <code>dst</code>.
 * @stable ICU 2.0
 */
U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
            const UChar *src,
            int32_t n );

#endif

/**
 * Synonym for memcpy(), but with UChars only.
 * @param dest The destination string
 * @param src The source string (can be NULL/invalid if count<=0)
 * @param count The number of characters to copy; no-op if <=0
 * @return A pointer to <code>dest</code>
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_memcpy(UChar *dest, const UChar *src, int32_t count);

/**
 * Synonym for memmove(), but with UChars only.
 * @param dest The destination string
 * @param src The source string (can be NULL/invalid if count<=0)
 * @param count The number of characters to move; no-op if <=0
 * @return A pointer to <code>dest</code>
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_memmove(UChar *dest, const UChar *src, int32_t count);

/**
 * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
 *
 * @param dest The destination string.
 * @param c The character to initialize the string.
 * @param count The maximum number of characters to set.
 * @return A pointer to <code>dest</code>.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_memset(UChar *dest, UChar c, int32_t count);

/**
 * Compare the first <code>count</code> UChars of each buffer.
 *
 * @param buf1 The first string to compare.
 * @param buf2 The second string to compare.
 * @param count The maximum number of UChars to compare.
 * @return When buf1 < buf2, a negative number is returned.
 *      When buf1 == buf2, 0 is returned.
 *      When buf1 > buf2, a positive number is returned.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);

/**
 * Compare two Unicode strings in code point order.
 * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
 * For details, see u_strCompare().
 *
 * @param s1 A string to compare.
 * @param s2 A string to compare.
 * @param count The maximum number of characters to compare.
 * @return a negative/zero/positive integer corresponding to whether
 * the first string is less than/equal to/greater than the second one
 * in code point order
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);

/**
 * Find the first occurrence of a BMP code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (contains <code>count</code> UChars).
 * @param c The BMP code point to find.
 * @param count The length of the string.
 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.0
 *
 * @see u_strchr
 * @see u_memchr32
 * @see u_strFindFirst
 */
U_STABLE UChar* U_EXPORT2
u_memchr(const UChar *s, UChar c, int32_t count);

/**
 * Find the first occurrence of a code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (contains <code>count</code> UChars).
 * @param c The code point to find.
 * @param count The length of the string.
 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.0
 *
 * @see u_strchr32
 * @see u_memchr
 * @see u_strFindFirst
 */
U_STABLE UChar* U_EXPORT2
u_memchr32(const UChar *s, UChar32 c, int32_t count);

/**
 * Find the last occurrence of a BMP code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (contains <code>count</code> UChars).
 * @param c The BMP code point to find.
 * @param count The length of the string.
 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strrchr
 * @see u_memrchr32
 * @see u_strFindLast
 */
U_STABLE UChar* U_EXPORT2
u_memrchr(const UChar *s, UChar c, int32_t count);

/**
 * Find the last occurrence of a code point in a string.
 * A surrogate code point is found only if its match in the text is not
 * part of a surrogate pair.
 * A NUL character is found at the string terminator.
 *
 * @param s The string to search (contains <code>count</code> UChars).
 * @param c The code point to find.
 * @param count The length of the string.
 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
 *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
 * @stable ICU 2.4
 *
 * @see u_strrchr32
 * @see u_memrchr
 * @see u_strFindLast
 */
U_STABLE UChar* U_EXPORT2
u_memrchr32(const UChar *s, UChar32 c, int32_t count);

/**
 * Unicode String literals in C.
 * We need one macro to declare a variable for the string
 * and to statically preinitialize it if possible,
 * and a second macro to dynamically initialize such a string variable if necessary.
 *
 * The macros are defined for maximum performance.
 * They work only for strings that contain "invariant characters", i.e.,
 * only latin letters, digits, and some punctuation.
 * See utypes.h for details.
 *
 * A pair of macros for a single string must be used with the same
 * parameters.
 * The string parameter must be a C string literal.
 * The length of the string, not including the terminating
 * `NUL`, must be specified as a constant.
 * The U_STRING_DECL macro should be invoked exactly once for one
 * such string variable before it is used.
 *
 * Usage:
 *
 *     U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
 *     U_STRING_DECL(ustringVar2, "jumps 5%", 8);
 *     static UBool didInit=FALSE;
 *
 *     int32_t function() {
 *         if(!didInit) {
 *             U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
 *             U_STRING_INIT(ustringVar2, "jumps 5%", 8);
 *             didInit=TRUE;
 *         }
 *         return u_strcmp(ustringVar1, ustringVar2);
 *     }
 * 
 * Note that the macros will NOT consistently work if their argument is another #`define`.
 * The following will not work on all platforms, don't use it.
 * 
 *     #define GLUCK "Mr. Gluck"
 *     U_STRING_DECL(var, GLUCK, 9)
 *     U_STRING_INIT(var, GLUCK, 9)
 *
 * Instead, use the string literal "Mr. Gluck"  as the argument to both macro
 * calls.
 *
 *
 * @stable ICU 2.0
 */
#if defined(U_DECLARE_UTF16)
#   define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
    /**@stable ICU 2.0 */
#   define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
    /**@stable ICU 2.0 */
#   define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
    /**@stable ICU 2.0 */
#   define U_STRING_INIT(var, cs, length)
#else
#   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
    /**@stable ICU 2.0 */
#   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
#endif

/**
 * Unescape a string of characters and write the resulting
 * Unicode characters to the destination buffer.  The following escape
 * sequences are recognized:
 *
 * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
 * \\Uhhhhhhhh   8 hex digits
 * \\xhh         1-2 hex digits
 * \\x{h...}     1-8 hex digits
 * \\ooo         1-3 octal digits; o in [0-7]
 * \\cX          control-X; X is masked with 0x1F
 *
 * as well as the standard ANSI C escapes:
 *
 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
 * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
 *
 * Anything else following a backslash is generically escaped.  For
 * example, "[a\\-z]" returns "[a-z]".
 *
 * If an escape sequence is ill-formed, this method returns an empty
 * string.  An example of an ill-formed sequence is "\\u" followed by
 * fewer than 4 hex digits.
 *
 * The above characters are recognized in the compiler's codepage,
 * that is, they are coded as 'u', '\\', etc.  Characters that are
 * not parts of escape sequences are converted using u_charsToUChars().
 *
 * This function is similar to UnicodeString::unescape() but not
 * identical to it.  The latter takes a source UnicodeString, so it
 * does escape recognition but no conversion.
 *
 * @param src a zero-terminated string of invariant characters
 * @param dest pointer to buffer to receive converted and unescaped
 * text and, if there is room, a zero terminator.  May be NULL for
 * preflighting, in which case no UChars will be written, but the
 * return value will still be valid.  On error, an empty string is
 * stored here (if possible).
 * @param destCapacity the number of UChars that may be written at
 * dest.  Ignored if dest == NULL.
 * @return the length of unescaped string.
 * @see u_unescapeAt
 * @see UnicodeString#unescape()
 * @see UnicodeString#unescapeAt()
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_unescape(const char *src,
           UChar *dest, int32_t destCapacity);

U_CDECL_BEGIN
/**
 * Callback function for u_unescapeAt() that returns a character of
 * the source text given an offset and a context pointer.  The context
 * pointer will be whatever is passed into u_unescapeAt().
 *
 * @param offset pointer to the offset that will be passed to u_unescapeAt().
 * @param context an opaque pointer passed directly into u_unescapeAt()
 * @return the character represented by the escape sequence at
 * offset
 * @see u_unescapeAt
 * @stable ICU 2.0
 */
typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
U_CDECL_END

/**
 * Unescape a single sequence. The character at offset-1 is assumed
 * (without checking) to be a backslash.  This method takes a callback
 * pointer to a function that returns the UChar at a given offset.  By
 * varying this callback, ICU functions are able to unescape char*
 * strings, UnicodeString objects, and UFILE pointers.
 *
 * If offset is out of range, or if the escape sequence is ill-formed,
 * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
 * for a list of recognized sequences.
 *
 * @param charAt callback function that returns a UChar of the source
 * text given an offset and a context pointer.
 * @param offset pointer to the offset that will be passed to charAt.
 * The offset value will be updated upon return to point after the
 * last parsed character of the escape sequence.  On error the offset
 * is unchanged.
 * @param length the number of characters in the source text.  The
 * last character of the source text is considered to be at offset
 * length-1.
 * @param context an opaque pointer passed directly into charAt.
 * @return the character represented by the escape sequence at
 * offset, or (UChar32)0xFFFFFFFF on error.
 * @see u_unescape()
 * @see UnicodeString#unescape()
 * @see UnicodeString#unescapeAt()
 * @stable ICU 2.0
 */
U_STABLE UChar32 U_EXPORT2
u_unescapeAt(UNESCAPE_CHAR_AT charAt,
             int32_t *offset,
             int32_t length,
             void *context);

/**
 * Uppercase the characters in a string.
 * Casing is locale-dependent and context-sensitive.
 * The result may be longer or shorter than the original.
 * The source string and the destination buffer are allowed to overlap.
 *
 * @param dest      A buffer for the result string. The result will be zero-terminated if
 *                  the buffer is large enough.
 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
 *                  dest may be NULL and the function will only return the length of the result
 *                  without writing any of the result string.
 * @param src       The original string
 * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
 * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
 * @param pErrorCode Must be a valid pointer to an error code value,
 *                  which must not indicate a failure before the function call.
 * @return The length of the result string. It may be greater than destCapacity. In that case,
 *         only some of the result was written to the destination buffer.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strToUpper(UChar *dest, int32_t destCapacity,
             const UChar *src, int32_t srcLength,
             const char *locale,
             UErrorCode *pErrorCode);

/**
 * Lowercase the characters in a string.
 * Casing is locale-dependent and context-sensitive.
 * The result may be longer or shorter than the original.
 * The source string and the destination buffer are allowed to overlap.
 *
 * @param dest      A buffer for the result string. The result will be zero-terminated if
 *                  the buffer is large enough.
 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
 *                  dest may be NULL and the function will only return the length of the result
 *                  without writing any of the result string.
 * @param src       The original string
 * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
 * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
 * @param pErrorCode Must be a valid pointer to an error code value,
 *                  which must not indicate a failure before the function call.
 * @return The length of the result string. It may be greater than destCapacity. In that case,
 *         only some of the result was written to the destination buffer.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strToLower(UChar *dest, int32_t destCapacity,
             const UChar *src, int32_t srcLength,
             const char *locale,
             UErrorCode *pErrorCode);

#if !UCONFIG_NO_BREAK_ITERATION

/**
 * Titlecase a string.
 * Casing is locale-dependent and context-sensitive.
 * Titlecasing uses a break iterator to find the first characters of words
 * that are to be titlecased. It titlecases those characters and lowercases
 * all others.
 *
 * The titlecase break iterator can be provided to customize for arbitrary
 * styles, using rules and dictionaries beyond the standard iterators.
 * It may be more efficient to always provide an iterator to avoid
 * opening and closing one for each string.
 * The standard titlecase iterator for the root locale implements the
 * algorithm of Unicode TR 21.
 *
 * This function uses only the setText(), first() and next() methods of the
 * provided break iterator.
 *
 * The result may be longer or shorter than the original.
 * The source string and the destination buffer are allowed to overlap.
 *
 * @param dest      A buffer for the result string. The result will be zero-terminated if
 *                  the buffer is large enough.
 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
 *                  dest may be NULL and the function will only return the length of the result
 *                  without writing any of the result string.
 * @param src       The original string
 * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
 * @param titleIter A break iterator to find the first characters of words
 *                  that are to be titlecased.
 *                  If none is provided (NULL), then a standard titlecase
 *                  break iterator is opened.
 * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
 * @param pErrorCode Must be a valid pointer to an error code value,
 *                  which must not indicate a failure before the function call.
 * @return The length of the result string. It may be greater than destCapacity. In that case,
 *         only some of the result was written to the destination buffer.
 * @stable ICU 2.1
 */
U_STABLE int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
             const UChar *src, int32_t srcLength,
             UBreakIterator *titleIter,
             const char *locale,
             UErrorCode *pErrorCode);

#endif

/**
 * Case-folds the characters in a string.
 *
 * Case-folding is locale-independent and not context-sensitive,
 * but there is an option for whether to include or exclude mappings for dotted I
 * and dotless i that are marked with 'T' in CaseFolding.txt.
 *
 * The result may be longer or shorter than the original.
 * The source string and the destination buffer are allowed to overlap.
 *
 * @param dest      A buffer for the result string. The result will be zero-terminated if
 *                  the buffer is large enough.
 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
 *                  dest may be NULL and the function will only return the length of the result
 *                  without writing any of the result string.
 * @param src       The original string
 * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
 * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
 * @param pErrorCode Must be a valid pointer to an error code value,
 *                  which must not indicate a failure before the function call.
 * @return The length of the result string. It may be greater than destCapacity. In that case,
 *         only some of the result was written to the destination buffer.
 * @stable ICU 2.0
 */
U_STABLE int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
              const UChar *src, int32_t srcLength,
              uint32_t options,
              UErrorCode *pErrorCode);

#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
/**
 * Convert a UTF-16 string to a wchar_t string.
 * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
 * this function simply calls the fast, dedicated function for that.
 * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Must be a valid pointer to an error code value,
 *                      which must not indicate a failure before the function call.
 * @return The pointer to destination buffer.
 * @stable ICU 2.0
 */
U_STABLE wchar_t* U_EXPORT2
u_strToWCS(wchar_t *dest, 
           int32_t destCapacity,
           int32_t *pDestLength,
           const UChar *src, 
           int32_t srcLength,
           UErrorCode *pErrorCode);
/**
 * Convert a wchar_t string to UTF-16.
 * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
 * this function simply calls the fast, dedicated function for that.
 * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Must be a valid pointer to an error code value,
 *                      which must not indicate a failure before the function call.
 * @return The pointer to destination buffer.
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2
u_strFromWCS(UChar   *dest,
             int32_t destCapacity, 
             int32_t *pDestLength,
             const wchar_t *src,
             int32_t srcLength,
             UErrorCode *pErrorCode);
#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */

/**
 * Convert a UTF-16 string to UTF-8.
 * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Must be a valid pointer to an error code value,
 *                      which must not indicate a failure before the function call.
 * @return The pointer to destination buffer.
 * @stable ICU 2.0
 * @see u_strToUTF8WithSub
 * @see u_strFromUTF8
 */
U_STABLE char* U_EXPORT2 
u_strToUTF8(char *dest,           
            int32_t destCapacity,
            int32_t *pDestLength,
            const UChar *src, 
            int32_t srcLength,
            UErrorCode *pErrorCode);

/**
 * Convert a UTF-8 string to UTF-16.
 * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Must be a valid pointer to an error code value,
 *                      which must not indicate a failure before the function call.
 * @return The pointer to destination buffer.
 * @stable ICU 2.0
 * @see u_strFromUTF8WithSub
 * @see u_strFromUTF8Lenient
 */
U_STABLE UChar* U_EXPORT2
u_strFromUTF8(UChar *dest,             
              int32_t destCapacity,
              int32_t *pDestLength,
              const char *src, 
              int32_t srcLength,
              UErrorCode *pErrorCode);

/**
 * Convert a UTF-16 string to UTF-8.
 *
 * Same as u_strToUTF8() except for the additional subchar which is output for
 * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
 * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param subchar       The substitution character to use in place of an illegal input sequence,
 *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
 *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
 *                      except for surrogate code points (U+D800..U+DFFF).
 *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
 * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
 *                      Set to 0 if no substitutions occur or subchar<0.
 *                      pNumSubstitutions can be NULL.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @see u_strToUTF8
 * @see u_strFromUTF8WithSub
 * @stable ICU 3.6
 */
U_STABLE char* U_EXPORT2
u_strToUTF8WithSub(char *dest,
            int32_t destCapacity,
            int32_t *pDestLength,
            const UChar *src,
            int32_t srcLength,
            UChar32 subchar, int32_t *pNumSubstitutions,
            UErrorCode *pErrorCode);

/**
 * Convert a UTF-8 string to UTF-16.
 *
 * Same as u_strFromUTF8() except for the additional subchar which is output for
 * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
 * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param subchar       The substitution character to use in place of an illegal input sequence,
 *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
 *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
 *                      except for surrogate code points (U+D800..U+DFFF).
 *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
 * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
 *                      Set to 0 if no substitutions occur or subchar<0.
 *                      pNumSubstitutions can be NULL.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @see u_strFromUTF8
 * @see u_strFromUTF8Lenient
 * @see u_strToUTF8WithSub
 * @stable ICU 3.6
 */
U_STABLE UChar* U_EXPORT2
u_strFromUTF8WithSub(UChar *dest,
              int32_t destCapacity,
              int32_t *pDestLength,
              const char *src,
              int32_t srcLength,
              UChar32 subchar, int32_t *pNumSubstitutions,
              UErrorCode *pErrorCode);

/**
 * Convert a UTF-8 string to UTF-16.
 *
 * Same as u_strFromUTF8() except that this function is designed to be very fast,
 * which it achieves by being lenient about malformed UTF-8 sequences.
 * This function is intended for use in environments where UTF-8 text is
 * expected to be well-formed.
 *
 * Its semantics are:
 * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
 * - The function will not read beyond the input string, nor write beyond
 *   the destCapacity.
 * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
 *   be well-formed UTF-16.
 *   The function will resynchronize to valid code point boundaries
 *   within a small number of code points after an illegal sequence.
 * - Non-shortest forms are not detected and will result in "spoofing" output.
 *
 * For further performance improvement, if srcLength is given (>=0),
 * then it must be destCapacity>=srcLength.
 *
 * There is no inverse u_strToUTF8Lenient() function because there is practically
 * no performance gain from not checking that a UTF-16 string is well-formed.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 *                      Unlike for other ICU functions, if srcLength>=0 then it
 *                      must be destCapacity>=srcLength.
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 *                      Unlike for other ICU functions, if srcLength>=0 but
 *                      destCapacity<srcLength, then *pDestLength will be set to srcLength
 *                      (and U_BUFFER_OVERFLOW_ERROR will be set)
 *                      regardless of the actual result length.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @see u_strFromUTF8
 * @see u_strFromUTF8WithSub
 * @see u_strToUTF8WithSub
 * @stable ICU 3.6
 */
U_STABLE UChar * U_EXPORT2
u_strFromUTF8Lenient(UChar *dest,
                     int32_t destCapacity,
                     int32_t *pDestLength,
                     const char *src,
                     int32_t srcLength,
                     UErrorCode *pErrorCode);

/**
 * Convert a UTF-16 string to UTF-32.
 * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Must be a valid pointer to an error code value,
 *                      which must not indicate a failure before the function call.
 * @return The pointer to destination buffer.
 * @see u_strToUTF32WithSub
 * @see u_strFromUTF32
 * @stable ICU 2.0
 */
U_STABLE UChar32* U_EXPORT2 
u_strToUTF32(UChar32 *dest, 
             int32_t  destCapacity,
             int32_t  *pDestLength,
             const UChar *src, 
             int32_t  srcLength,
             UErrorCode *pErrorCode);

/**
 * Convert a UTF-32 string to UTF-16.
 * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Must be a valid pointer to an error code value,
 *                      which must not indicate a failure before the function call.
 * @return The pointer to destination buffer.
 * @see u_strFromUTF32WithSub
 * @see u_strToUTF32
 * @stable ICU 2.0
 */
U_STABLE UChar* U_EXPORT2 
u_strFromUTF32(UChar   *dest,
               int32_t destCapacity, 
               int32_t *pDestLength,
               const UChar32 *src,
               int32_t srcLength,
               UErrorCode *pErrorCode);

/**
 * Convert a UTF-16 string to UTF-32.
 *
 * Same as u_strToUTF32() except for the additional subchar which is output for
 * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
 * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If
 *                      pDestLength!=NULL then *pDestLength is always set to the
 *                      number of output units corresponding to the transformation of
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param subchar       The substitution character to use in place of an illegal input sequence,
 *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
 *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
 *                      except for surrogate code points (U+D800..U+DFFF).
 *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
 * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
 *                      Set to 0 if no substitutions occur or subchar<0.
 *                      pNumSubstitutions can be NULL.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @see u_strToUTF32
 * @see u_strFromUTF32WithSub
 * @stable ICU 4.2
 */
U_STABLE UChar32* U_EXPORT2
u_strToUTF32WithSub(UChar32 *dest,
             int32_t destCapacity,
             int32_t *pDestLength,
             const UChar *src,
             int32_t srcLength,
             UChar32 subchar, int32_t *pNumSubstitutions,
             UErrorCode *pErrorCode);

/**
 * Convert a UTF-32 string to UTF-16.
 *
 * Same as u_strFromUTF32() except for the additional subchar which is output for
 * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
 * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If
 *                      pDestLength!=NULL then *pDestLength is always set to the
 *                      number of output units corresponding to the transformation of
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param subchar       The substitution character to use in place of an illegal input sequence,
 *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
 *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
 *                      except for surrogate code points (U+D800..U+DFFF).
 *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
 * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
 *                      Set to 0 if no substitutions occur or subchar<0.
 *                      pNumSubstitutions can be NULL.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @see u_strFromUTF32
 * @see u_strToUTF32WithSub
 * @stable ICU 4.2
 */
U_STABLE UChar* U_EXPORT2
u_strFromUTF32WithSub(UChar *dest,
               int32_t destCapacity,
               int32_t *pDestLength,
               const UChar32 *src,
               int32_t srcLength,
               UChar32 subchar, int32_t *pNumSubstitutions,
               UErrorCode *pErrorCode);

/**
 * Convert a 16-bit Unicode string to Java Modified UTF-8.
 * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
 *
 * This function behaves according to the documentation for Java DataOutput.writeUTF()
 * except that it does not encode the output length in the destination buffer
 * and does not have an output length restriction.
 * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
 *
 * The input string need not be well-formed UTF-16.
 * (Therefore there is no subchar parameter.)
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @stable ICU 4.4
 * @see u_strToUTF8WithSub
 * @see u_strFromJavaModifiedUTF8WithSub
 */
U_STABLE char* U_EXPORT2 
u_strToJavaModifiedUTF8(
        char *dest,
        int32_t destCapacity,
        int32_t *pDestLength,
        const UChar *src, 
        int32_t srcLength,
        UErrorCode *pErrorCode);

/**
 * Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
 * If the input string is not well-formed and no substitution char is specified, 
 * then the U_INVALID_CHAR_FOUND error code is set.
 *
 * This function behaves according to the documentation for Java DataInput.readUTF()
 * except that it takes a length parameter rather than
 * interpreting the first two input bytes as the length.
 * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
 *
 * The output string may not be well-formed UTF-16.
 *
 * @param dest          A buffer for the result string. The result will be zero-terminated if
 *                      the buffer is large enough.
 * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
 *                      dest may be NULL and the function will only return the length of the 
 *                      result without writing any of the result string (pre-flighting).
 * @param pDestLength   A pointer to receive the number of units written to the destination. If 
 *                      pDestLength!=NULL then *pDestLength is always set to the 
 *                      number of output units corresponding to the transformation of 
 *                      all the input units, even in case of a buffer overflow.
 * @param src           The original source string
 * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
 * @param subchar       The substitution character to use in place of an illegal input sequence,
 *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
 *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
 *                      except for surrogate code points (U+D800..U+DFFF).
 *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
 * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
 *                      Set to 0 if no substitutions occur or subchar<0.
 *                      pNumSubstitutions can be NULL.
 * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
 *                      pass the U_SUCCESS() test, or else the function returns
 *                      immediately. Check for U_FAILURE() on output or use with
 *                      function chaining. (See User Guide for details.)
 * @return The pointer to destination buffer.
 * @see u_strFromUTF8WithSub
 * @see u_strFromUTF8Lenient
 * @see u_strToJavaModifiedUTF8
 * @stable ICU 4.4
 */
U_STABLE UChar* U_EXPORT2
u_strFromJavaModifiedUTF8WithSub(
        UChar *dest,
        int32_t destCapacity,
        int32_t *pDestLength,
        const char *src,
        int32_t srcLength,
        UChar32 subchar, int32_t *pNumSubstitutions,
        UErrorCode *pErrorCode);




// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*   Copyright (C) 1997-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
******************************************************************************
*   Date        Name        Description
*   03/28/00    aliu        Creation.
******************************************************************************
*/







U_NAMESPACE_BEGIN

/**
 * Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
 * hashtable implemented in C.  Hashtable is designed to be idiomatic and
 * easy-to-use in C++.
 *
 * Hashtable is an INTERNAL CLASS.
 */
class U_COMMON_API Hashtable : public UMemory {
    UHashtable* hash;
    UHashtable hashObj;

    inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);

    inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);

public:
    /**
     * Construct a hashtable
     * @param ignoreKeyCase If true, keys are case insensitive.
     * @param status Error code
    */
    inline Hashtable(UBool ignoreKeyCase, UErrorCode& status);

    /**
     * Construct a hashtable
     * @param ignoreKeyCase If true, keys are case insensitive.
     * @param size initial size allocation
     * @param status Error code
    */
    inline Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);

    /**
     * Construct a hashtable
     * @param keyComp Comparator for comparing the keys
     * @param valueComp Comparator for comparing the values
     * @param status Error code
    */
    inline Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);

    /**
     * Construct a hashtable
     * @param status Error code
    */
    inline Hashtable(UErrorCode& status);

    /**
     * Construct a hashtable, _disregarding any error_.  Use this constructor
     * with caution.
     */
    inline Hashtable();

    /**
     * Non-virtual destructor; make this virtual if Hashtable is subclassed
     * in the future.
     */
    inline ~Hashtable();

    inline UObjectDeleter *setValueDeleter(UObjectDeleter *fn);

    inline int32_t count() const;

    inline void* put(const UnicodeString& key, void* value, UErrorCode& status);

    inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);

    inline void* get(const UnicodeString& key) const;

    inline int32_t geti(const UnicodeString& key) const;

    inline void* remove(const UnicodeString& key);

    inline int32_t removei(const UnicodeString& key);

    inline void removeAll(void);

    inline const UHashElement* find(const UnicodeString& key) const;

    /**
     * @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
     * @see uhash_nextElement
     */
    inline const UHashElement* nextElement(int32_t& pos) const;

    inline UKeyComparator* setKeyComparator(UKeyComparator*keyComp);

    inline UValueComparator* setValueComparator(UValueComparator* valueComp);

    inline UBool equals(const Hashtable& that) const;
private:
    Hashtable(const Hashtable &other); // forbid copying of this class
    Hashtable &operator=(const Hashtable &other); // forbid copying of this class
};

/*********************************************************************
 * Implementation
 ********************************************************************/

inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
                            UValueComparator *valueComp, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
    uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
    if (U_SUCCESS(status)) {
        hash = &hashObj;
        uhash_setKeyDeleter(hash, uprv_deleteUObject);
    }
}

inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
                                UValueComparator *valueComp, int32_t size, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
    uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
    if (U_SUCCESS(status)) {
        hash = &hashObj;
        uhash_setKeyDeleter(hash, uprv_deleteUObject);
    }
}

inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
                 UErrorCode& status) : hash(0) {
    init( uhash_hashUnicodeString, keyComp, valueComp, status);
}

inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
 : hash(0)
{
    init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
                        : uhash_hashUnicodeString,
            ignoreKeyCase ? uhash_compareCaselessUnicodeString
                        : uhash_compareUnicodeString,
            NULL,
            status);
}

inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
 : hash(0)
{
    initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
                        : uhash_hashUnicodeString,
            ignoreKeyCase ? uhash_compareCaselessUnicodeString
                        : uhash_compareUnicodeString,
            NULL, size,
            status);
}

inline Hashtable::Hashtable(UErrorCode& status)
 : hash(0)
{
    init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}

inline Hashtable::Hashtable()
 : hash(0)
{
    UErrorCode status = U_ZERO_ERROR;
    init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}

inline Hashtable::~Hashtable() {
    if (hash != NULL) {
        uhash_close(hash);
    }
}

inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
    return uhash_setValueDeleter(hash, fn);
}

inline int32_t Hashtable::count() const {
    return uhash_count(hash);
}

inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
    return uhash_put(hash, new UnicodeString(key), value, &status);
}

inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
    return uhash_puti(hash, new UnicodeString(key), value, &status);
}

inline void* Hashtable::get(const UnicodeString& key) const {
    return uhash_get(hash, &key);
}

inline int32_t Hashtable::geti(const UnicodeString& key) const {
    return uhash_geti(hash, &key);
}

inline void* Hashtable::remove(const UnicodeString& key) {
    return uhash_remove(hash, &key);
}

inline int32_t Hashtable::removei(const UnicodeString& key) {
    return uhash_removei(hash, &key);
}

inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
    return uhash_find(hash, &key);
}

inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
    return uhash_nextElement(hash, &pos);
}

inline void Hashtable::removeAll(void) {
    uhash_removeAll(hash);
}

inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
    return uhash_setKeyComparator(hash, keyComp);
}

inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
    return uhash_setValueComparator(hash, valueComp);
}

inline UBool Hashtable::equals(const Hashtable& that)const{
   return uhash_equals(hash, that.hash);
}
U_NAMESPACE_END


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  normalizer2impl.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009nov22
*   created by: Markus W. Scherer
*/




#if !UCONFIG_NO_NORMALIZATION


// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// ucptrie.h (modified from utrie2.h)
// created: 2017dec29 Markus W. Scherer








U_CDECL_BEGIN

/**
 * \file
 *
 * This file defines an immutable Unicode code point trie.
 *
 * @see UCPTrie
 * @see UMutableCPTrie
 */

#ifndef U_IN_DOXYGEN
/** @internal */
typedef union UCPTrieData {
    /** @internal */
    const void *ptr0;
    /** @internal */
    const uint16_t *ptr16;
    /** @internal */
    const uint32_t *ptr32;
    /** @internal */
    const uint8_t *ptr8;
} UCPTrieData;
#endif

/**
 * Immutable Unicode code point trie structure.
 * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
 * For details see http://site.icu-project.org/design/struct/utrie
 *
 * Do not access UCPTrie fields directly; use public functions and macros.
 * Functions are easy to use: They support all trie types and value widths.
 *
 * When performance is really important, macros provide faster access.
 * Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
 * There are "fast" macros for special optimized use cases.
 *
 * The macros will return bogus values, or may crash, if used on the wrong type or value width.
 *
 * @see UMutableCPTrie
 * @stable ICU 63
 */
struct UCPTrie {
#ifndef U_IN_DOXYGEN
    /** @internal */
    const uint16_t *index;
    /** @internal */
    UCPTrieData data;

    /** @internal */
    int32_t indexLength;
    /** @internal */
    int32_t dataLength;
    /** Start of the last range which ends at U+10FFFF. @internal */
    UChar32 highStart;
    /** highStart>>12 @internal */
    uint16_t shifted12HighStart;

    /** @internal */
    int8_t type;  // UCPTrieType
    /** @internal */
    int8_t valueWidth;  // UCPTrieValueWidth

    /** padding/reserved @internal */
    uint32_t reserved32;
    /** padding/reserved @internal */
    uint16_t reserved16;

    /**
     * Internal index-3 null block offset.
     * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
     * @internal
     */
    uint16_t index3NullOffset;
    /**
     * Internal data null block offset, not shifted.
     * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
     * @internal
     */
    int32_t dataNullOffset;
    /** @internal */
    uint32_t nullValue;

#ifdef UCPTRIE_DEBUG
    /** @internal */
    const char *name;
#endif
#endif
};
#ifndef U_IN_DOXYGEN
typedef struct UCPTrie UCPTrie;
#endif

/**
 * Selectors for the type of a UCPTrie.
 * Different trade-offs for size vs. speed.
 *
 * @see umutablecptrie_buildImmutable
 * @see ucptrie_openFromBinary
 * @see ucptrie_getType
 * @stable ICU 63
 */
enum UCPTrieType {
    /**
     * For ucptrie_openFromBinary() to accept any type.
     * ucptrie_getType() will return the actual type.
     * @stable ICU 63
     */
    UCPTRIE_TYPE_ANY = -1,
    /**
     * Fast/simple/larger BMP data structure. Use functions and "fast" macros.
     * @stable ICU 63
     */
    UCPTRIE_TYPE_FAST,
    /**
     * Small/slower BMP data structure. Use functions and "small" macros.
     * @stable ICU 63
     */
    UCPTRIE_TYPE_SMALL
};
#ifndef U_IN_DOXYGEN
typedef enum UCPTrieType UCPTrieType;
#endif

/**
 * Selectors for the number of bits in a UCPTrie data value.
 *
 * @see umutablecptrie_buildImmutable
 * @see ucptrie_openFromBinary
 * @see ucptrie_getValueWidth
 * @stable ICU 63
 */
enum UCPTrieValueWidth {
    /**
     * For ucptrie_openFromBinary() to accept any data value width.
     * ucptrie_getValueWidth() will return the actual data value width.
     * @stable ICU 63
     */
    UCPTRIE_VALUE_BITS_ANY = -1,
    /**
     * The trie stores 16 bits per data value.
     * It returns them as unsigned values 0..0xffff=65535.
     * @stable ICU 63
     */
    UCPTRIE_VALUE_BITS_16,
    /**
     * The trie stores 32 bits per data value.
     * @stable ICU 63
     */
    UCPTRIE_VALUE_BITS_32,
    /**
     * The trie stores 8 bits per data value.
     * It returns them as unsigned values 0..0xff=255.
     * @stable ICU 63
     */
    UCPTRIE_VALUE_BITS_8
};
#ifndef U_IN_DOXYGEN
typedef enum UCPTrieValueWidth UCPTrieValueWidth;
#endif

/**
 * Opens a trie from its binary form, stored in 32-bit-aligned memory.
 * Inverse of ucptrie_toBinary().
 *
 * The memory must remain valid and unchanged as long as the trie is used.
 * You must ucptrie_close() the trie once you are done using it.
 *
 * @param type selects the trie type; results in an
 *             U_INVALID_FORMAT_ERROR if it does not match the binary data;
 *             use UCPTRIE_TYPE_ANY to accept any type
 * @param valueWidth selects the number of bits in a data value; results in an
 *                  U_INVALID_FORMAT_ERROR if it does not match the binary data;
 *                  use UCPTRIE_VALUE_BITS_ANY to accept any data value width
 * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie
 * @param length the number of bytes available at data;
 *               can be more than necessary
 * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
 *                      can be NULL
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the trie
 *
 * @see umutablecptrie_open
 * @see umutablecptrie_buildImmutable
 * @see ucptrie_toBinary
 * @stable ICU 63
 */
U_CAPI UCPTrie * U_EXPORT2
ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
                       const void *data, int32_t length, int32_t *pActualLength,
                       UErrorCode *pErrorCode);

/**
 * Closes a trie and releases associated memory.
 *
 * @param trie the trie
 * @stable ICU 63
 */
U_CAPI void U_EXPORT2
ucptrie_close(UCPTrie *trie);

/**
 * Returns the trie type.
 *
 * @param trie the trie
 * @return the trie type
 * @see ucptrie_openFromBinary
 * @see UCPTRIE_TYPE_ANY
 * @stable ICU 63
 */
U_CAPI UCPTrieType U_EXPORT2
ucptrie_getType(const UCPTrie *trie);

/**
 * Returns the number of bits in a trie data value.
 *
 * @param trie the trie
 * @return the number of bits in a trie data value
 * @see ucptrie_openFromBinary
 * @see UCPTRIE_VALUE_BITS_ANY
 * @stable ICU 63
 */
U_CAPI UCPTrieValueWidth U_EXPORT2
ucptrie_getValueWidth(const UCPTrie *trie);

/**
 * Returns the value for a code point as stored in the trie, with range checking.
 * Returns the trie error value if c is not in the range 0..U+10FFFF.
 *
 * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
 * Easier to use because, unlike the macros, this function works on all UCPTrie
 * objects, for all types and value widths.
 *
 * @param trie the trie
 * @param c the code point
 * @return the trie value,
 *         or the trie error value if the code point is not in the range 0..U+10FFFF
 * @stable ICU 63
 */
U_CAPI uint32_t U_EXPORT2
ucptrie_get(const UCPTrie *trie, UChar32 c);

/**
 * Returns the last code point such that all those from start to there have the same value.
 * Can be used to efficiently iterate over all same-value ranges in a trie.
 * (This is normally faster than iterating over code points and get()ting each value,
 * but much slower than a data structure that stores ranges directly.)
 *
 * If the UCPMapValueFilter function pointer is not NULL, then
 * the value to be delivered is passed through that function, and the return value is the end
 * of the range where all values are modified to the same actual value.
 * The value is unchanged if that function pointer is NULL.
 *
 * Example:
 * \code
 * UChar32 start = 0, end;
 * uint32_t value;
 * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
 *                                NULL, NULL, &value)) >= 0) {
 *     // Work with the range start..end and its value.
 *     start = end + 1;
 * }
 * \endcode
 *
 * @param trie the trie
 * @param start range start
 * @param option defines whether surrogates are treated normally,
 *               or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
 * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
 * @param filter a pointer to a function that may modify the trie data value,
 *     or NULL if the values from the trie are to be used unmodified
 * @param context an opaque pointer that is passed on to the filter function
 * @param pValue if not NULL, receives the value that every code point start..end has;
 *     may have been modified by filter(context, trie value)
 *     if that function pointer is not NULL
 * @return the range end code point, or -1 if start is not a valid code point
 * @stable ICU 63
 */
U_CAPI UChar32 U_EXPORT2
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
                 UCPMapRangeOption option, uint32_t surrogateValue,
                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue);

/**
 * Writes a memory-mappable form of the trie into 32-bit aligned memory.
 * Inverse of ucptrie_openFromBinary().
 *
 * @param trie the trie
 * @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
 *             can be NULL if capacity==0
 * @param capacity the number of bytes available at data, or 0 for pure preflighting
 * @param pErrorCode an in/out ICU UErrorCode;
 *                   U_BUFFER_OVERFLOW_ERROR if the capacity is too small
 * @return the number of bytes written or (if buffer overflow) needed for the trie
 *
 * @see ucptrie_openFromBinary()
 * @stable ICU 63
 */
U_CAPI int32_t U_EXPORT2
ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);

/**
 * Macro parameter value for a trie with 16-bit data values.
 * Use the name of this macro as a "dataAccess" parameter in other macros.
 * Do not use this macro in any other way.
 *
 * @see UCPTRIE_VALUE_BITS_16
 * @stable ICU 63
 */
#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])

/**
 * Macro parameter value for a trie with 32-bit data values.
 * Use the name of this macro as a "dataAccess" parameter in other macros.
 * Do not use this macro in any other way.
 *
 * @see UCPTRIE_VALUE_BITS_32
 * @stable ICU 63
 */
#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])

/**
 * Macro parameter value for a trie with 8-bit data values.
 * Use the name of this macro as a "dataAccess" parameter in other macros.
 * Do not use this macro in any other way.
 *
 * @see UCPTRIE_VALUE_BITS_8
 * @stable ICU 63
 */
#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])

/**
 * Returns a trie value for a code point, with range checking.
 * Returns the trie error value if c is not in the range 0..U+10FFFF.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param c (UChar32, in) the input code point
 * @return The code point's trie value.
 * @stable ICU 63
 */
#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))

/**
 * Returns a 16-bit trie value for a code point, with range checking.
 * Returns the trie error value if c is not in the range U+0000..U+10FFFF.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param c (UChar32, in) the input code point
 * @return The code point's trie value.
 * @stable ICU 63
 */
#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
    dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))

/**
 * UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
 * and gets a value from the trie.
 * Sets the trie error value if c is an unpaired surrogate.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param src (const UChar *, in/out) the source text pointer
 * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
 * @param c (UChar32, out) variable for the code point
 * @param result (out) variable for the trie lookup result
 * @stable ICU 63
 */
#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
    (c) = *(src)++; \
    int32_t __index; \
    if (!U16_IS_SURROGATE(c)) { \
        __index = _UCPTRIE_FAST_INDEX(trie, c); \
    } else { \
        uint16_t __c2; \
        if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
            ++(src); \
            (c) = U16_GET_SUPPLEMENTARY((c), __c2); \
            __index = _UCPTRIE_SMALL_INDEX(trie, c); \
        } else { \
            __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
        } \
    } \
    (result) = dataAccess(trie, __index); \
} UPRV_BLOCK_MACRO_END

/**
 * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
 * and gets a value from the trie.
 * Sets the trie error value if c is an unpaired surrogate.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param start (const UChar *, in) the start pointer for the text
 * @param src (const UChar *, in/out) the source text pointer
 * @param c (UChar32, out) variable for the code point
 * @param result (out) variable for the trie lookup result
 * @stable ICU 63
 */
#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
    (c) = *--(src); \
    int32_t __index; \
    if (!U16_IS_SURROGATE(c)) { \
        __index = _UCPTRIE_FAST_INDEX(trie, c); \
    } else { \
        uint16_t __c2; \
        if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
            --(src); \
            (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
            __index = _UCPTRIE_SMALL_INDEX(trie, c); \
        } else { \
            __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
        } \
    } \
    (result) = dataAccess(trie, __index); \
} UPRV_BLOCK_MACRO_END

/**
 * UTF-8: Post-increments src and gets a value from the trie.
 * Sets the trie error value for an ill-formed byte sequence.
 *
 * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
 * because it would be more work to do so and is often not needed.
 * If the trie value differs from the error value, then the byte sequence is well-formed,
 * and the code point can be assembled without revalidation.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param src (const char *, in/out) the source text pointer
 * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
 * @param result (out) variable for the trie lookup result
 * @stable ICU 63
 */
#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __lead = (uint8_t)*(src)++; \
    if (!U8_IS_SINGLE(__lead)) { \
        uint8_t __t1, __t2, __t3; \
        if ((src) != (limit) && \
            (__lead >= 0xe0 ? \
                __lead < 0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
                    U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
                    ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
                    (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
                :  /* U+10000..U+10FFFF */ \
                    (__lead -= 0xf0) <= 4 && \
                    U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
                    (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
                    (__t2 = *(src) - 0x80) <= 0x3f && \
                    ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
                    (__lead = __lead >= (trie)->shifted12HighStart ? \
                        (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
                        ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
            :  /* U+0080..U+07FF */ \
                __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
                (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
            ++(src); \
        } else { \
            __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET;  /* ill-formed*/ \
        } \
    } \
    (result) = dataAccess(trie, __lead); \
} UPRV_BLOCK_MACRO_END

/**
 * UTF-8: Pre-decrements src and gets a value from the trie.
 * Sets the trie error value for an ill-formed byte sequence.
 *
 * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
 * because it would be more work to do so and is often not needed.
 * If the trie value differs from the error value, then the byte sequence is well-formed,
 * and the code point can be assembled without revalidation.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param start (const char *, in) the start pointer for the text
 * @param src (const char *, in/out) the source text pointer
 * @param result (out) variable for the trie lookup result
 * @stable ICU 63
 */
#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __index = (uint8_t)*--(src); \
    if (!U8_IS_SINGLE(__index)) { \
        __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
                                              (const uint8_t *)(src)); \
        (src) -= __index & 7; \
        __index >>= 3; \
    } \
    (result) = dataAccess(trie, __index); \
} UPRV_BLOCK_MACRO_END

/**
 * Returns a trie value for an ASCII code point, without range checking.
 *
 * @param trie (const UCPTrie *, in) the trie (of either fast or small type)
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param c (UChar32, in) the input code point; must be U+0000..U+007F
 * @return The ASCII code point's trie value.
 * @stable ICU 63
 */
#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)

/**
 * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
 * Can be used to look up a value for a UTF-16 code unit if other parts of
 * the string processing check for surrogates.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
 * @return The BMP code point's trie value.
 * @stable ICU 63
 */
#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))

/**
 * Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
 * without range checking.
 *
 * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
 * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
 * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
 * @return The supplementary code point's trie value.
 * @stable ICU 63
 */
#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))

/* Internal definitions ----------------------------------------------------- */

#ifndef U_IN_DOXYGEN

/**
 * Internal implementation constants.
 * These are needed for the API macros, but users should not use these directly.
 * @internal
 */
enum {
    /** @internal */
    UCPTRIE_FAST_SHIFT = 6,

    /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
    UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,

    /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
    UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,

    /** @internal */
    UCPTRIE_SMALL_MAX = 0xfff,

    /**
     * Offset from dataLength (to be subtracted) for fetching the
     * value returned for out-of-range code points and ill-formed UTF-8/16.
     * @internal
     */
    UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
    /**
     * Offset from dataLength (to be subtracted) for fetching the
     * value returned for code points highStart..U+10FFFF.
     * @internal
     */
    UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
};

/* Internal functions and macros -------------------------------------------- */
// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API

/** @internal */
U_INTERNAL int32_t U_EXPORT2
ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c);

/** @internal */
U_INTERNAL int32_t U_EXPORT2
ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);

/**
 * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
 * Do not call directly.
 * @internal
 */
U_INTERNAL int32_t U_EXPORT2
ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
                            const uint8_t *start, const uint8_t *src);

/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
#define _UCPTRIE_FAST_INDEX(trie, c) \
    ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))

/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
#define _UCPTRIE_SMALL_INDEX(trie, c) \
    ((c) >= (trie)->highStart ? \
        (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
        ucptrie_internalSmallIndex(trie, c))

/**
 * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
 * Returns the data index.
 * @internal
 */
#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
    ((uint32_t)(c) <= (uint32_t)(fastMax) ? \
        _UCPTRIE_FAST_INDEX(trie, c) : \
        (uint32_t)(c) <= 0x10ffff ? \
            _UCPTRIE_SMALL_INDEX(trie, c) : \
            (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)

U_CDECL_END

#endif  // U_IN_DOXYGEN

#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUCPTriePointer
 * "Smart pointer" class, closes a UCPTrie via ucptrie_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 63
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);

U_NAMESPACE_END

#endif  // U_SHOW_CPLUSPLUS_API






// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*/
//----------------------------------------------------------------------------
// File:     mutex.h
//
// Lightweight C++ wrapper for umtx_ C mutex functions
//
// Author:   Alan Liu  1/31/97
// History:
// 06/04/97   helena         Updated setImplementation as per feedback from 5/21 drop.
// 04/07/1999  srl               refocused as a thin wrapper
//
//----------------------------------------------------------------------------



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1997-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*
* File UMUTEX.H
*
* Modification History:
*
*   Date        Name        Description
*   04/02/97  aliu        Creation.
*   04/07/99  srl         rewrite - C interface, multiple mutices
*   05/13/99  stephen     Changed to umutex (from cmutex)
******************************************************************************
*/


#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 2001-2014, International Business Machines
*                Corporation and others. All Rights Reserved.
******************************************************************************
*   file name:  uclean.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001July05
*   created by: George Rhoten
*/



/**
 * \file
 * \brief C API: Initialize and clean up ICU
 */
 
/**
 *  Initialize ICU.
 *
 *  Use of this function is optional.  It is OK to simply use ICU
 *  services and functions without first having initialized
 *  ICU by calling u_init().
 *
 *  u_init() will attempt to load some part of ICU's data, and is
 *  useful as a test for configuration or installation problems that
 *  leave the ICU data inaccessible.  A successful invocation of u_init()
 *  does not, however, guarantee that all ICU data is accessible.
 *
 *  Multiple calls to u_init() cause no harm, aside from the small amount
 *  of time required.
 *
 *  In old versions of ICU, u_init() was required in multi-threaded applications
 *  to ensure the thread safety of ICU.  u_init() is no longer needed for this purpose.
 *
 * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
 *    An Error will be returned if some required part of ICU data can not
 *    be loaded or initialized.
 *    The function returns immediately if the input error code indicates a
 *    failure, as usual.
 *
 * @stable ICU 2.6
 */  
U_STABLE void U_EXPORT2 
u_init(UErrorCode *status);

#ifndef U_HIDE_SYSTEM_API
/**
 * Clean up the system resources, such as allocated memory or open files,
 * used in all ICU libraries. This will free/delete all memory owned by the
 * ICU libraries, and return them to their original load state. All open ICU
 * items (collators, resource bundles, converters, etc.) must be closed before
 * calling this function, otherwise ICU may not free its allocated memory
 * (e.g. close your converters and resource bundles before calling this
 * function). Generally, this function should be called once just before
 * an application exits. For applications that dynamically load and unload
 * the ICU libraries (relatively uncommon), u_cleanup() should be called
 * just before the library unload.
 * <p>
 * u_cleanup() also clears any ICU heap functions, mutex functions or
 * trace functions that may have been set for the process.  
 * This has the effect of restoring ICU to its initial condition, before
 * any of these override functions were installed.  Refer to
 * u_setMemoryFunctions(), u_setMutexFunctions and 
 * utrace_setFunctions().  If ICU is to be reinitialized after
 * calling u_cleanup(), these runtime override functions will need to
 * be set up again if they are still required.
 * <p>
 * u_cleanup() is not thread safe.  All other threads should stop using ICU
 * before calling this function.
 * <p>
 * Any open ICU items will be left in an undefined state by u_cleanup(),
 * and any subsequent attempt to use such an item will give unpredictable
 * results.
 * <p>
 * After calling u_cleanup(), an application may continue to use ICU by
 * calling u_init().  An application must invoke u_init() first from one single
 * thread before allowing other threads call u_init().  All threads existing
 * at the time of the first thread's call to u_init() must also call
 * u_init() themselves before continuing with other ICU operations.  
 * <p>
 * The use of u_cleanup() just before an application terminates is optional,
 * but it should be called only once for performance reasons. The primary
 * benefit is to eliminate reports of memory or resource leaks originating
 * in ICU code from the results generated by heap analysis tools.
 * <p>
 * <strong>Use this function with great care!</strong>
 * </p>
 *
 * @stable ICU 2.0
 * @system
 */
U_STABLE void U_EXPORT2 
u_cleanup(void);

U_CDECL_BEGIN
/**
  *  Pointer type for a user supplied memory allocation function.
  *  @param context user supplied value, obtained from u_setMemoryFunctions().
  *  @param size    The number of bytes to be allocated
  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
  *  @stable ICU 2.8
  *  @system
  */
typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
/**
  *  Pointer type for a user supplied memory re-allocation function.
  *  @param context user supplied value, obtained from u_setMemoryFunctions().
  *  @param size    The number of bytes to be allocated
  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
  *  @stable ICU 2.8
  *  @system
  */
typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
/**
  *  Pointer type for a user supplied memory free  function.  Behavior should be
  *  similar the standard C library free().
  *  @param context user supplied value, obtained from u_setMemoryFunctions().
  *  @param mem     Pointer to the memory block to be resized
  *  @param size    The new size for the block
  *  @return        Pointer to the resized memory block, or NULL if the resizing failed.
  *  @stable ICU 2.8
  *  @system
  */
typedef void  U_CALLCONV UMemFreeFn (const void *context, void *mem);

/**
 *  Set the functions that ICU will use for memory allocation.
 *  Use of this function is optional; by default (without this function), ICU will
 *  use the standard C library malloc() and free() functions.
 *  This function can only be used when ICU is in an initial, unused state, before
 *  u_init() has been called.
 *  @param context This pointer value will be saved, and then (later) passed as
 *                 a parameter to the memory functions each time they
 *                 are called.
 *  @param a       Pointer to a user-supplied malloc function.
 *  @param r       Pointer to a user-supplied realloc function.
 *  @param f       Pointer to a user-supplied free function.
 *  @param status  Receives error values.
 *  @stable ICU 2.8
 *  @system
 */  
U_STABLE void U_EXPORT2 
u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f, 
                    UErrorCode *status);

U_CDECL_END

#ifndef U_HIDE_DEPRECATED_API
/*********************************************************************************
 *
 * Deprecated Functions
 *
 *    The following functions for user supplied mutexes are no longer supported.
 *    Any attempt to use them will return a U_UNSUPPORTED_ERROR.
 *
 **********************************************************************************/

/**
  * An opaque pointer type that represents an ICU mutex.
  * For user-implemented mutexes, the value will typically point to a
  *  struct or object that implements the mutex.
  * @deprecated ICU 52. This type is no longer supported.
  * @system
  */
typedef void *UMTX;

U_CDECL_BEGIN
/**
  *  Function Pointer type for a user supplied mutex initialization function.
  *  The user-supplied function will be called by ICU whenever ICU needs to create a
  *  new mutex.  The function implementation should create a mutex, and store a pointer
  *  to something that uniquely identifies the mutex into the UMTX that is supplied
  *  as a parameter.
  *  @param context user supplied value, obtained from u_setMutexFunctions().
  *  @param mutex   Receives a pointer that identifies the new mutex.
  *                 The mutex init function must set the UMTX to a non-null value.   
  *                 Subsequent calls by ICU to lock, unlock, or destroy a mutex will 
  *                 identify the mutex by the UMTX value.
  *  @param status  Error status.  Report errors back to ICU by setting this variable
  *                 with an error code.
  *  @deprecated ICU 52. This function is no longer supported.
  *  @system
  */
typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX  *mutex, UErrorCode* status);


/**
  *  Function Pointer type for a user supplied mutex functions.
  *  One of the  user-supplied functions with this signature will be called by ICU
  *  whenever ICU needs to lock, unlock, or destroy a mutex.
  *  @param context user supplied value, obtained from u_setMutexFunctions().
  *  @param mutex   specify the mutex on which to operate.
  *  @deprecated ICU 52. This function is no longer supported.
  *  @system
  */
typedef void U_CALLCONV UMtxFn   (const void *context, UMTX  *mutex);
U_CDECL_END

/**
  *  Set the functions that ICU will use for mutex operations
  *  Use of this function is optional; by default (without this function), ICU will
  *  directly access system functions for mutex operations
  *  This function can only be used when ICU is in an initial, unused state, before
  *  u_init() has been called.
  *  @param context This pointer value will be saved, and then (later) passed as
  *                 a parameter to the user-supplied mutex functions each time they
  *                 are called. 
  *  @param init    Pointer to a mutex initialization function.  Must be non-null.
  *  @param destroy Pointer to the mutex destroy function.  Must be non-null.
  *  @param lock    pointer to the mutex lock function.  Must be non-null.
  *  @param unlock  Pointer to the mutex unlock function.  Must be non-null.
  *  @param status  Receives error values.
  *  @deprecated ICU 52. This function is no longer supported.
  *  @system
  */  
U_DEPRECATED void U_EXPORT2 
u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
                    UErrorCode *status);


/**
  *  Pointer type for a user supplied atomic increment or decrement function.
  *  @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
  *  @param p   Pointer to a 32 bit int to be incremented or decremented
  *  @return    The value of the variable after the inc or dec operation.
  *  @deprecated ICU 52. This function is no longer supported.
  *  @system
  */
typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);

/**
 *  Set the functions that ICU will use for atomic increment and decrement of int32_t values.
 *  Use of this function is optional; by default (without this function), ICU will
 *  use its own internal implementation of atomic increment/decrement.
 *  This function can only be used when ICU is in an initial, unused state, before
 *  u_init() has been called.
 *  @param context This pointer value will be saved, and then (later) passed as
 *                 a parameter to the increment and decrement functions each time they
 *                 are called.  This function can only be called 
 *  @param inc     Pointer to a function to do an atomic increment operation.  Must be non-null.
 *  @param dec     Pointer to a function to do an atomic decrement operation.  Must be non-null.
 *  @param status  Receives error values.
 *  @deprecated ICU 52. This function is no longer supported.
 *  @system
 */  
U_DEPRECATED void U_EXPORT2 
u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
                    UErrorCode *status);

#endif  /* U_HIDE_DEPRECATED_API */
#endif  /* U_HIDE_SYSTEM_API */




// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
*  FILE NAME : putilimp.h
*
*   Date        Name        Description
*   10/17/04    grhoten     Move internal functions from putil.h to this file.
******************************************************************************
*/





/**
 * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
 * Nearly all CPUs and compilers implement a right-shift of a signed integer
 * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
 * into the vacated bits (sign extension).
 * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
 *
 * This can be useful for storing a signed value in the upper bits
 * and another bit field in the lower bits.
 * The signed value can be retrieved by simple right-shifting.
 *
 * This is consistent with the Java language.
 *
 * However, the C standard allows compilers to implement a right-shift of a signed integer
 * as a Logical Shift Right which copies a 0 into the vacated bits.
 * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
 *
 * Code that depends on the natural behavior should be guarded with this macro,
 * with an alternate path for unusual platforms.
 * @internal
 */
#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
    /* Use the predefined value. */
#else
    /*
     * Nearly all CPUs & compilers implement a right-shift of a signed integer
     * as an Arithmetic Shift Right (with sign extension).
     */
#   define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
#endif

/** Define this to 1 if your platform supports IEEE 754 floating point,
   to 0 if it does not. */
#ifndef IEEE_754
#   define IEEE_754 1
#endif

/**
 * uintptr_t is an optional part of the standard definitions in stdint.h.
 * The opengroup.org documentation for stdint.h says
 * "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
 * otherwise, they are optional."
 * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
 *
 * Do not use ptrdiff_t since it is signed. size_t is unsigned.
 */
/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
typedef size_t uintptr_t;
#endif

/*===========================================================================*/
/** @{ Information about POSIX support                                       */
/*===========================================================================*/

#ifdef U_HAVE_NL_LANGINFO_CODESET
    /* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX
#   define U_HAVE_NL_LANGINFO_CODESET 0
#else
#   define U_HAVE_NL_LANGINFO_CODESET 1
#endif

#ifdef U_NL_LANGINFO_CODESET
    /* Use the predefined value. */
#elif !U_HAVE_NL_LANGINFO_CODESET
#   define U_NL_LANGINFO_CODESET -1
#elif U_PLATFORM == U_PF_OS400
   /* not defined */
#else
#   define U_NL_LANGINFO_CODESET CODESET
#endif

#if defined(U_TZSET) || defined(U_HAVE_TZSET)
    /* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
    // UWP doesn't support tzset or environment variables for tz
#if U_PLATFORM_HAS_WINUWP_API == 0
#   define U_TZSET _tzset
#endif
#elif U_PLATFORM == U_PF_OS400
   /* not defined */
#else
#   define U_TZSET tzset
#endif

#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
    /* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID
#   define U_TIMEZONE timezone
#elif defined(__UCLIBC__)
    // uClibc does not have __timezone or _timezone.
#elif defined(_NEWLIB_VERSION)
#   define U_TIMEZONE _timezone
#elif defined(__GLIBC__)
    // glibc
#   define U_TIMEZONE __timezone
#elif U_PLATFORM_IS_LINUX_BASED
    // not defined
#elif U_PLATFORM_USES_ONLY_WIN32_API
#   define U_TIMEZONE _timezone
#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
   /* not defined */
#elif U_PLATFORM == U_PF_OS400
   /* not defined */
#elif U_PLATFORM == U_PF_IPHONE
   /* not defined */
#else
#   define U_TIMEZONE timezone
#endif

#if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
    /* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
    /* not usable on all windows platforms */
#if U_PLATFORM_HAS_WINUWP_API == 0
#   define U_TZNAME _tzname
#endif
#elif U_PLATFORM == U_PF_OS400
   /* not defined */
#else
#   define U_TZNAME tzname
#endif

#ifdef U_HAVE_MMAP
    /* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
#   define U_HAVE_MMAP 0
#else
#   define U_HAVE_MMAP 1
#endif

#ifdef U_HAVE_POPEN
    /* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
#   define U_HAVE_POPEN 0
#elif U_PLATFORM == U_PF_OS400
#   define U_HAVE_POPEN 0
#else
#   define U_HAVE_POPEN 1
#endif

/**
 * \def U_HAVE_DIRENT_H
 * Defines whether dirent.h is available.
 * @internal
 */
#ifdef U_HAVE_DIRENT_H
    /* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
#   define U_HAVE_DIRENT_H 0
#else
#   define U_HAVE_DIRENT_H 1
#endif

/** @} */

/*===========================================================================*/
/** @{ Programs used by ICU code                                             */
/*===========================================================================*/

/**
 * \def U_MAKE_IS_NMAKE
 * Defines whether the "make" program is Windows nmake.
 */
#ifdef U_MAKE_IS_NMAKE
    /* Use the predefined value. */
#elif U_PLATFORM == U_PF_WINDOWS
#   define U_MAKE_IS_NMAKE 1
#else
#   define U_MAKE_IS_NMAKE 0
#endif

/** @} */

/*==========================================================================*/
/* Platform utilities                                                       */
/*==========================================================================*/

/**
 * Platform utilities isolates the platform dependencies of the
 * library.  For each platform which this code is ported to, these
 * functions may have to be re-implemented.
 */

/**
 * Floating point utility to determine if a double is Not a Number (NaN).
 * @internal
 */
U_INTERNAL UBool   U_EXPORT2 uprv_isNaN(double d);
/**
 * Floating point utility to determine if a double has an infinite value.
 * @internal
 */
U_INTERNAL UBool   U_EXPORT2 uprv_isInfinite(double d);
/**
 * Floating point utility to determine if a double has a positive infinite value.
 * @internal
 */
U_INTERNAL UBool   U_EXPORT2 uprv_isPositiveInfinity(double d);
/**
 * Floating point utility to determine if a double has a negative infinite value.
 * @internal
 */
U_INTERNAL UBool   U_EXPORT2 uprv_isNegativeInfinity(double d);
/**
 * Floating point utility that returns a Not a Number (NaN) value.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_getNaN(void);
/**
 * Floating point utility that returns an infinite value.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_getInfinity(void);

/**
 * Floating point utility to truncate a double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_trunc(double d);
/**
 * Floating point utility to calculate the floor of a double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_floor(double d);
/**
 * Floating point utility to calculate the ceiling of a double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_ceil(double d);
/**
 * Floating point utility to calculate the absolute value of a double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_fabs(double d);
/**
 * Floating point utility to calculate the fractional and integer parts of a double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_modf(double d, double* pinteger);
/**
 * Floating point utility to calculate the remainder of a double divided by another double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_fmod(double d, double y);
/**
 * Floating point utility to calculate d to the power of exponent (d^exponent).
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_pow(double d, double exponent);
/**
 * Floating point utility to calculate 10 to the power of exponent (10^exponent).
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_pow10(int32_t exponent);
/**
 * Floating point utility to calculate the maximum value of two doubles.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_fmax(double d, double y);
/**
 * Floating point utility to calculate the minimum value of two doubles.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_fmin(double d, double y);
/**
 * Private utility to calculate the maximum value of two integers.
 * @internal
 */
U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
/**
 * Private utility to calculate the minimum value of two integers.
 * @internal
 */
U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);

#if U_IS_BIG_ENDIAN
#   define uprv_isNegative(number) (*((signed char *)&(number))<0)
#else
#   define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
#endif

/**
 * Return the largest positive number that can be represented by an integer
 * type of arbitrary bit length.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_maxMantissa(void);

/**
 * Floating point utility to calculate the logarithm of a double.
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_log(double d);

/**
 * Does common notion of rounding e.g. uprv_floor(x + 0.5);
 * @param x the double number
 * @return the rounded double
 * @internal
 */
U_INTERNAL double  U_EXPORT2 uprv_round(double x);

/**
 * Adds the signed integers a and b, storing the result in res.
 * Checks for signed integer overflow.
 * Similar to the GCC/Clang extension __builtin_add_overflow
 *
 * @param a The first operand.
 * @param b The second operand.
 * @param res a + b
 * @return true if overflow occurred; false if no overflow occurred.
 * @internal
 */
U_INTERNAL UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res);

/**
 * Multiplies the signed integers a and b, storing the result in res.
 * Checks for signed integer overflow.
 * Similar to the GCC/Clang extension __builtin_mul_overflow
 *
 * @param a The first multiplicand.
 * @param b The second multiplicand.
 * @param res a * b
 * @return true if overflow occurred; false if no overflow occurred.
 * @internal
 */
U_INTERNAL UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res);

#if 0
/**
 * Returns the number of digits after the decimal point in a double number x.
 *
 * @param x the double number
 * @return the number of digits after the decimal point in a double number x.
 * @internal
 */
/*U_INTERNAL int32_t  U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
#endif

#if !U_CHARSET_IS_UTF8
/**
 * Please use ucnv_getDefaultName() instead.
 * Return the default codepage for this platform and locale.
 * This function can call setlocale() on Unix platforms. Please read the
 * platform documentation on setlocale() before calling this function.
 * @return the default codepage for this platform
 * @internal
 */
U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultCodepage(void);
#endif

/**
 * Please use uloc_getDefault() instead.
 * Return the default locale ID string by querying the system, or
 *     zero if one cannot be found.
 * This function can call setlocale() on Unix platforms. Please read the
 * platform documentation on setlocale() before calling this function.
 * @return the default locale ID string
 * @internal
 */
U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultLocaleID(void);

/**
 * Time zone utilities
 *
 * Wrappers for C runtime library functions relating to timezones.
 * The t_tzset() function (similar to tzset) uses the current setting
 * of the environment variable TZ to assign values to three global
 * variables: daylight, timezone, and tzname. These variables have the
 * following meanings, and are declared in &lt;time.h&gt;.
 *
 *   daylight   Nonzero if daylight-saving-time zone (DST) is specified
 *              in TZ; otherwise, 0. Default value is 1.
 *   timezone   Difference in seconds between coordinated universal
 *              time and local time. E.g., -28,800 for PST (GMT-8hrs)
 *   tzname(0)  Three-letter time-zone name derived from TZ environment
 *              variable. E.g., "PST".
 *   tzname(1)  Three-letter DST zone name derived from TZ environment
 *              variable.  E.g., "PDT". If DST zone is omitted from TZ,
 *              tzname(1) is an empty string.
 *
 * Notes: For example, to set the TZ environment variable to correspond
 * to the current time zone in Germany, you can use one of the
 * following statements:
 *
 *   set TZ=GST1GDT
 *   set TZ=GST+1GDT
 *
 * If the TZ value is not set, t_tzset() attempts to use the time zone
 * information specified by the operating system. Under Windows NT
 * and Windows 95, this information is specified in the Control Panel's
 * Date/Time application.
 * @internal
 */
U_INTERNAL void     U_EXPORT2 uprv_tzset(void);

/**
 * Difference in seconds between coordinated universal
 * time and local time. E.g., -28,800 for PST (GMT-8hrs)
 * @return the difference in seconds between coordinated universal time and local time.
 * @internal
 */
U_INTERNAL int32_t  U_EXPORT2 uprv_timezone(void);

/**
 *   tzname(0)  Three-letter time-zone name derived from TZ environment
 *              variable. E.g., "PST".
 *   tzname(1)  Three-letter DST zone name derived from TZ environment
 *              variable.  E.g., "PDT". If DST zone is omitted from TZ,
 *              tzname(1) is an empty string.
 * @internal
 */
U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);

/**
 * Reset the global tzname cache.
 * @internal
 */
U_INTERNAL void uprv_tzname_clear_cache();

/**
 * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
 * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
 * @return the UTC time measured in milliseconds
 * @internal
 */
U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);

/**
 * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
 * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
 * exposes time to the end user.
 * @return the UTC time measured in milliseconds
 * @internal
 */
U_INTERNAL UDate U_EXPORT2 uprv_getRawUTCtime(void);

/**
 * Determine whether a pathname is absolute or not, as defined by the platform.
 * @param path Pathname to test
 * @return TRUE if the path is absolute
 * @internal (ICU 3.0)
 */
U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);

/**
 * Use U_MAX_PTR instead of this function.
 * @param void pointer to test
 * @return the largest possible pointer greater than the base
 * @internal (ICU 3.8)
 */
U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);

/**
 * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
 * In fact, buffer sizes must not exceed 2GB so that the difference between
 * the buffer limit and the buffer start can be expressed in an int32_t.
 *
 * The definition of U_MAX_PTR must fulfill the following conditions:
 * - return the largest possible pointer greater than base
 * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
 * - avoid wrapping around at high addresses
 * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
 *
 * @param base The beginning of a buffer to find the maximum offset from
 * @internal
 */
#ifndef U_MAX_PTR
#  if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
    /* We have 31-bit pointers. */
#    define U_MAX_PTR(base) ((void *)0x7fffffff)
#  elif U_PLATFORM == U_PF_OS400
#    define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
#  elif 0
    /*
     * For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
     * but that do not define uintptr_t.
     *
     * However, this does not work on modern compilers:
     * The C++ standard does not define pointer overflow, and allows compilers to
     * assume that p+u>p for any pointer p and any integer u>0.
     * Thus, modern compilers optimize away the ">" comparison.
     * (See ICU tickets #7187 and #8096.)
     */
#    define U_MAX_PTR(base) \
    ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
        ? ((char *)(base)+0x7fffffffu) \
        : (char *)-1))
#  else
    /* Default version. C++ standard compliant for scalar pointers. */
#    define U_MAX_PTR(base) \
    ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
        ? ((uintptr_t)(base)+0x7fffffffu) \
        : (uintptr_t)-1))
#  endif
#endif


#ifdef __cplusplus
/**
 * Pin a buffer capacity such that doing pointer arithmetic
 * on the destination pointer and capacity cannot overflow.
 *
 * The pinned capacity must fulfill the following conditions (for positive capacities):
 *   - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
 *   - (dest + capacity) >= dest
 *   - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
 *
 * @param dest the destination buffer pointer.
 * @param capacity the requested buffer capacity, in units of type T.
 * @return the pinned capacity.
 * @internal
 */
template <typename T>
inline int32_t pinCapacity(T *dest, int32_t capacity) {
    if (capacity <= 0) { return capacity; }

    uintptr_t destInt = (uintptr_t)dest;
    uintptr_t maxInt;

#  if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
    // We have 31-bit pointers.
    maxInt = 0x7fffffff;
#  elif U_PLATFORM == U_PF_OS400
    maxInt = (uintptr_t)uprv_maximumPtr((void *)dest);
#  else
    maxInt = destInt + 0x7fffffffu;
    if (maxInt < destInt) {
        // Less than 2GB to the end of the address space.
        // Pin to that to prevent address overflow.
        maxInt = (uintptr_t)-1;
    }
#  endif

    uintptr_t maxBytes = maxInt - destInt;  // max. 2GB
    int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T));
    return capacity <= maxCapacity ? capacity : maxCapacity;
}
#endif   // __cplusplus

/*  Dynamic Library Functions */

typedef void (UVoidFunction)(void);

/**
 * Define malloc and related functions
 * @internal
 */
#if U_PLATFORM == U_PF_OS400
# define uprv_default_malloc(x) _C_TS_malloc(x)
# define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
# define uprv_default_free(x) _C_TS_free(x)
/* also _C_TS_calloc(x) */
#else
/* C defaults */
# define uprv_default_malloc(x) malloc(x)
# define uprv_default_realloc(x,y) realloc(x,y)
# define uprv_default_free(x) free(x)
#endif




#if defined(U_USER_ATOMICS_H) || defined(U_USER_MUTEX_H)
// Support for including an alternate implementation of atomic & mutex operations has been withdrawn.
// See issue ICU-20185.
#error U_USER_ATOMICS and U_USER_MUTEX_H are not supported
#endif

// Export an explicit template instantiation of std::atomic<int32_t>. 
// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
//
// Similar story for std::atomic<std::mutex *>, and the exported UMutex class.
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
#if defined(__clang__) || defined(_MSC_VER)
  #if defined(__clang__)
    // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
    #pragma clang diagnostic push
    #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
  #endif
template struct U_COMMON_API std::atomic<int32_t>;
template struct U_COMMON_API std::atomic<std::mutex *>;
  #if defined(__clang__)
    #pragma clang diagnostic pop
  #endif
#elif defined(__GNUC__)
// For GCC this class is already exported/visible, so no need for U_COMMON_API.
template struct std::atomic<int32_t>;
template struct std::atomic<std::mutex *>;
#endif
#endif


U_NAMESPACE_BEGIN

/****************************************************************************
 *
 *   Low Level Atomic Operations, ICU wrappers for.
 *
 ****************************************************************************/

typedef std::atomic<int32_t> u_atomic_int32_t;
#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)

inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
    return var.load(std::memory_order_acquire);
}

inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
    var.store(val, std::memory_order_release);
}

inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
    return var->fetch_add(1) + 1;
}

inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
    return var->fetch_sub(1) - 1;
}


/*************************************************************************************************
 *
 *  UInitOnce Definitions.
 *
 *************************************************************************************************/

struct UInitOnce {
    u_atomic_int32_t   fState;
    UErrorCode       fErrCode;
    void reset() {fState = 0;}
    UBool isReset() {return umtx_loadAcquire(fState) == 0;}
// Note: isReset() is used by service registration code.
//                 Thread safety of this usage needs review.
};

#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR}


U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &);
U_COMMON_API void  U_EXPORT2 umtx_initImplPostInit(UInitOnce &);

template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) {
    if (umtx_loadAcquire(uio.fState) == 2) {
        return;
    }
    if (umtx_initImplPreInit(uio)) {
        (obj->*fp)();
        umtx_initImplPostInit(uio);
    }
}


// umtx_initOnce variant for plain functions, or static class functions.
//               No context parameter.
inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) {
    if (umtx_loadAcquire(uio.fState) == 2) {
        return;
    }
    if (umtx_initImplPreInit(uio)) {
        (*fp)();
        umtx_initImplPostInit(uio);
    }
}

// umtx_initOnce variant for plain functions, or static class functions.
//               With ErrorCode, No context parameter.
inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) {
    if (U_FAILURE(errCode)) {
        return;
    }
    if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
        // We run the initialization.
        (*fp)(errCode);
        uio.fErrCode = errCode;
        umtx_initImplPostInit(uio);
    } else {
        // Someone else already ran the initialization.
        if (U_FAILURE(uio.fErrCode)) {
            errCode = uio.fErrCode;
        }
    }
}

// umtx_initOnce variant for plain functions, or static class functions,
//               with a context parameter.
template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) {
    if (umtx_loadAcquire(uio.fState) == 2) {
        return;
    }
    if (umtx_initImplPreInit(uio)) {
        (*fp)(context);
        umtx_initImplPostInit(uio);
    }
}

// umtx_initOnce variant for plain functions, or static class functions,
//               with a context parameter and an error code.
template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) {
    if (U_FAILURE(errCode)) {
        return;
    }
    if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
        // We run the initialization.
        (*fp)(context, errCode);
        uio.fErrCode = errCode;
        umtx_initImplPostInit(uio);
    } else {
        // Someone else already ran the initialization.
        if (U_FAILURE(uio.fErrCode)) {
            errCode = uio.fErrCode;
        }
    }
}

// UMutex should be constexpr-constructible, so that no initialization code
// is run during startup.
// This works on all C++ libraries except MS VS before VS2019.
#if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \
    (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142)
    // (VS std lib older than VS2017) || (VS std lib version < VS2019)
#   define UMUTEX_CONSTEXPR
#else
#   define UMUTEX_CONSTEXPR constexpr
#endif

/**
 * UMutex - ICU Mutex class.
 *
 * This is the preferred Mutex class for use within ICU implementation code.
 * It is a thin wrapper over C++ std::mutex, with these additions:
 *    - Static instances are safe, not triggering static construction or destruction,
 *      and the associated order of construction or destruction issues.
 *    - Plumbed into u_cleanup() for destructing the underlying std::mutex,
 *      which frees any OS level resources they may be holding.
 *
 * Limitations:
 *    - Static or global instances only. Cannot be heap allocated. Cannot appear as a
 *      member of another class.
 *    - No condition variables or other advanced features. If needed, you will need to use
 *      std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp
 *
 * Typical Usage:
 *    static UMutex myMutex;
 *
 *    {
 *       Mutex lock(myMutex);
 *       ...    // Do stuff that is protected by myMutex;
 *    }         // myMutex is released when lock goes out of scope.
 */

class U_COMMON_API UMutex {
public:
    UMUTEX_CONSTEXPR UMutex() {}
    ~UMutex() = default;

    UMutex(const UMutex &other) = delete;
    UMutex &operator =(const UMutex &other) = delete;
    void *operator new(size_t) = delete;

    // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
    void lock() {
        std::mutex *m = fMutex.load(std::memory_order_acquire);
        if (m == nullptr) { m = getMutex(); }
        m->lock();
    }
    void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }

    static void cleanup();

private:
    alignas(std::mutex) char fStorage[sizeof(std::mutex)] {};
    std::atomic<std::mutex *> fMutex { nullptr };

    /** All initialized UMutexes are kept in a linked list, so that they can be found,
     * and the underlying std::mutex destructed, by u_cleanup().
     */
    UMutex *fListLink { nullptr };
    static UMutex *gListHead;

    /** Out-of-line function to lazily initialize a UMutex on first use.
     * Initial fast check is inline, in lock().  The returned value may never
     * be nullptr.
     */
    std::mutex *getMutex();
};


/* Lock a mutex.
 * @param mutex The given mutex to be locked.  Pass NULL to specify
 *              the global ICU mutex.  Recursive locks are an error
 *              and may cause a deadlock on some platforms.
 */
U_INTERNAL void U_EXPORT2 umtx_lock(UMutex* mutex);

/* Unlock a mutex.
 * @param mutex The given mutex to be unlocked.  Pass NULL to specify
 *              the global ICU mutex.
 */
U_INTERNAL void U_EXPORT2 umtx_unlock (UMutex* mutex);


U_NAMESPACE_END



U_NAMESPACE_BEGIN

/**
  * Mutex is a helper class for convenient locking and unlocking of a UMutex.
  *
  * Creating a local scope Mutex will lock a UMutex, holding the lock until the Mutex
  * goes out of scope.
  *
  *  If no UMutex is specified, the ICU global mutex is implied.
  *
  *  For example:
  *
  *  static UMutex myMutex;
  *
  *  void Function(int arg1, int arg2)
  *  {
  *     static Object* foo;      // Shared read-write object
  *     Mutex mutex(&myMutex);   // or no args for the global lock
  *     foo->Method();
  *     // When 'mutex' goes out of scope and gets destroyed here, the lock is released
  *  }
  *
  *  Note:  Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
  *         returning a Mutex. This is a common mistake which silently slips through the
  *         compiler!!
  */

class U_COMMON_API Mutex : public UMemory {
public:
    Mutex(UMutex *mutex = nullptr) : fMutex(mutex) {
        umtx_lock(fMutex);
    }
    ~Mutex() {
        umtx_unlock(fMutex);
    }

    Mutex(const Mutex &other) = delete; // forbid assigning of this class
    Mutex &operator=(const Mutex &other) = delete; // forbid copying of this class
    void *operator new(size_t s) = delete;  // forbid heap allocation. Locals only.

private:
    UMutex   *fMutex;
};


U_NAMESPACE_END


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2003-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  udataswp.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003jun05
*   created by: Markus W. Scherer
*
*   Definitions for ICU data transformations for different platforms,
*   changing between big- and little-endian data and/or between
*   charset families (ASCII<->EBCDIC).
*/


#include <stdarg.h>


/* forward declaration */

U_CDECL_BEGIN

struct UDataSwapper;
typedef struct UDataSwapper UDataSwapper;

/**
 * Function type for data transformation.
 * Transforms data, or just returns the length of the data if
 * the input length is -1.
 * Swap functions assume that their data pointers are aligned properly.
 *
 * Quick implementation outline:
 * (best to copy and adapt and existing swapper implementation)
 * check that the data looks like the expected format
 * if(length<0) {
 *   preflight:
 *   never dereference outData
 *   read inData and determine the data size
 *   assume that inData is long enough for this
 * } else {
 *   outData can be NULL if length==0
 *   inData==outData (in-place swapping) possible but not required!
 *   verify that length>=(actual size)
 *   if there is a chance that not every byte up to size is reached
 *     due to padding etc.:
 *   if(inData!=outData) {
 *     memcpy(outData, inData, actual size);
 *   }
 *   swap contents
 * }
 * return actual size
 *
 * Further implementation notes:
 * - read integers from inData before swapping them
 *   because in-place swapping can make them unreadable
 * - compareInvChars compares a local Unicode string with already-swapped
 *   output charset strings
 *
 * @param ds Pointer to UDataSwapper containing global data about the
 *           transformation and function pointers for handling primitive
 *           types.
 * @param inData Pointer to the input data to be transformed or examined.
 * @param length Length of the data, counting bytes. May be -1 for preflighting.
 *               If length>=0, then transform the data.
 *               If length==-1, then only determine the length of the data.
 *               The length cannot be determined from the data itself for all
 *               types of data (e.g., not for simple arrays of integers).
 * @param outData Pointer to the output data buffer.
 *                If length>=0 (transformation), then the output buffer must
 *                have a capacity of at least length.
 *                If length==-1, then outData will not be used and can be NULL.
 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
 *                   fulfill U_SUCCESS on input.
 * @return The actual length of the data.
 *
 * @see UDataSwapper
 * @internal ICU 2.8
 */
typedef int32_t U_CALLCONV
UDataSwapFn(const UDataSwapper *ds,
            const void *inData, int32_t length, void *outData,
            UErrorCode *pErrorCode);

/**
 * Convert one uint16_t from input to platform endianness.
 * @internal ICU 2.8
 */
typedef uint16_t U_CALLCONV
UDataReadUInt16(uint16_t x);

/**
 * Convert one uint32_t from input to platform endianness.
 * @internal ICU 2.8
 */
typedef uint32_t U_CALLCONV
UDataReadUInt32(uint32_t x);

/**
 * Convert one uint16_t from platform to input endianness.
 * @internal ICU 2.8
 */
typedef void U_CALLCONV
UDataWriteUInt16(uint16_t *p, uint16_t x);

/**
 * Convert one uint32_t from platform to input endianness.
 * @internal ICU 2.8
 */
typedef void U_CALLCONV
UDataWriteUInt32(uint32_t *p, uint32_t x);

/**
 * Compare invariant-character strings, one in the output data and the
 * other one caller-provided in Unicode.
 * An output data string is compared because strings are usually swapped
 * before the rest of the data, to allow for sorting of string tables
 * according to the output charset.
 * You can use -1 for the length parameters of NUL-terminated strings as usual.
 * Returns Unicode code point order for invariant characters.
 * @internal ICU 2.8
 */
typedef int32_t U_CALLCONV
UDataCompareInvChars(const UDataSwapper *ds,
                     const char *outString, int32_t outLength,
                     const UChar *localString, int32_t localLength);

/**
 * Function for message output when an error occurs during data swapping.
 * A format string and variable number of arguments are passed
 * like for vprintf().
 *
 * @param context A function-specific context pointer.
 * @param fmt The format string.
 * @param args The arguments for format string inserts.
 *
 * @internal ICU 2.8
 */
typedef void U_CALLCONV
UDataPrintError(void *context, const char *fmt, va_list args);

struct UDataSwapper {
    /** Input endianness. @internal ICU 2.8 */
    UBool inIsBigEndian;
    /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
    uint8_t inCharset;
    /** Output endianness. @internal ICU 2.8 */
    UBool outIsBigEndian;
    /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
    uint8_t outCharset;

    /* basic functions for reading data values */

    /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
    UDataReadUInt16 *readUInt16;
    /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
    UDataReadUInt32 *readUInt32;
    /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
    UDataCompareInvChars *compareInvChars;

    /* basic functions for writing data values */

    /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
    UDataWriteUInt16 *writeUInt16;
    /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
    UDataWriteUInt32 *writeUInt32;

    /* basic functions for data transformations */

    /** Transform an array of 16-bit integers. @internal ICU 2.8 */
    UDataSwapFn *swapArray16;
    /** Transform an array of 32-bit integers. @internal ICU 2.8 */
    UDataSwapFn *swapArray32;
    /** Transform an array of 64-bit integers. @internal ICU 53 */
    UDataSwapFn *swapArray64;
    /** Transform an invariant-character string. @internal ICU 2.8 */
    UDataSwapFn *swapInvChars;

    /**
     * Function for message output when an error occurs during data swapping.
     * Can be NULL.
     * @internal ICU 2.8
     */
    UDataPrintError *printError;
    /** Context pointer for printError. @internal ICU 2.8 */
    void *printErrorContext;
};

U_CDECL_END

U_CAPI UDataSwapper * U_EXPORT2
udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
                  UBool outIsBigEndian, uint8_t outCharset,
                  UErrorCode *pErrorCode);

/**
 * Open a UDataSwapper for the given input data and the specified output
 * characteristics.
 * Values of -1 for any of the characteristics mean the local platform's
 * characteristics.
 *
 * @see udata_swap
 * @internal ICU 2.8
 */
U_CAPI UDataSwapper * U_EXPORT2
udata_openSwapperForInputData(const void *data, int32_t length,
                              UBool outIsBigEndian, uint8_t outCharset,
                              UErrorCode *pErrorCode);

U_CAPI void U_EXPORT2
udata_closeSwapper(UDataSwapper *ds);

/**
 * Read the beginning of an ICU data piece, recognize magic bytes,
 * swap the structure.
 * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
 *
 * @return The size of the data header, in bytes.
 *
 * @internal ICU 2.8
 */
U_CAPI int32_t U_EXPORT2
udata_swapDataHeader(const UDataSwapper *ds,
                     const void *inData, int32_t length, void *outData,
                     UErrorCode *pErrorCode);

/**
 * Convert one int16_t from input to platform endianness.
 * @internal ICU 2.8
 */
U_CAPI int16_t U_EXPORT2
udata_readInt16(const UDataSwapper *ds, int16_t x);

/**
 * Convert one int32_t from input to platform endianness.
 * @internal ICU 2.8
 */
U_CAPI int32_t U_EXPORT2
udata_readInt32(const UDataSwapper *ds, int32_t x);

/**
 * Swap a block of invariant, NUL-terminated strings, but not padding
 * bytes after the last string.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
udata_swapInvStringBlock(const UDataSwapper *ds,
                         const void *inData, int32_t length, void *outData,
                         UErrorCode *pErrorCode);

U_CAPI void U_EXPORT2
udata_printError(const UDataSwapper *ds,
                 const char *fmt,
                 ...);

/* internal exports from putil.c -------------------------------------------- */

/* declared here to keep them out of the public putil.h */

/**
 * Swap invariant char * strings ASCII->EBCDIC.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
uprv_ebcdicFromAscii(const UDataSwapper *ds,
                     const void *inData, int32_t length, void *outData,
                     UErrorCode *pErrorCode);

/**
 * Copy invariant ASCII char * strings and verify they are invariant.
 * @internal
 */
U_CFUNC int32_t
uprv_copyAscii(const UDataSwapper *ds,
               const void *inData, int32_t length, void *outData,
               UErrorCode *pErrorCode);

/**
 * Swap invariant char * strings EBCDIC->ASCII.
 * @internal
 */
U_CFUNC int32_t
uprv_asciiFromEbcdic(const UDataSwapper *ds,
                     const void *inData, int32_t length, void *outData,
                     UErrorCode *pErrorCode);

/**
 * Copy invariant EBCDIC char * strings and verify they are invariant.
 * @internal
 */
U_CFUNC int32_t
uprv_copyEbcdic(const UDataSwapper *ds,
                const void *inData, int32_t length, void *outData,
                UErrorCode *pErrorCode);

/**
 * Compare ASCII invariant char * with Unicode invariant UChar *
 * @internal
 */
U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper *ds,
                     const char *outString, int32_t outLength,
                     const UChar *localString, int32_t localLength);

/**
 * Compare EBCDIC invariant char * with Unicode invariant UChar *
 * @internal
 */
U_CFUNC int32_t
uprv_compareInvEbcdic(const UDataSwapper *ds,
                      const char *outString, int32_t outLength,
                      const UChar *localString, int32_t localLength);

/**
 * \def uprv_compareInvWithUChar
 * Compare an invariant-character strings with a UChar string
 * @internal
 */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define uprv_compareInvWithUChar uprv_compareInvAscii
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define uprv_compareInvWithUChar uprv_compareInvEbcdic
#else
#   error Unknown charset family!
#endif

// utrie_swap.cpp -----------------------------------------------------------***

/**
 * Swaps a serialized UTrie.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode);

/**
 * Swaps a serialized UTrie2.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper *ds,
            const void *inData, int32_t length, void *outData,
            UErrorCode *pErrorCode);

/**
 * Swaps a serialized UCPTrie.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
ucptrie_swap(const UDataSwapper *ds,
             const void *inData, int32_t length, void *outData,
             UErrorCode *pErrorCode);

/**
 * Swaps a serialized UTrie, UTrie2, or UCPTrie.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
utrie_swapAnyVersion(const UDataSwapper *ds,
                     const void *inData, int32_t length, void *outData,
                     UErrorCode *pErrorCode);

/* material... -------------------------------------------------------------- */

#if 0

/* udata.h */

/**
 * Public API function in udata.c
 *
 * Same as udata_openChoice() but automatically swaps the data.
 * isAcceptable, if not NULL, may accept data with endianness and charset family
 * different from the current platform's properties.
 * If the data is acceptable and the platform properties do not match, then
 * the swap function is called to swap an allocated version of the data.
 * Preflighting may or may not be performed depending on whether the size of
 * the loaded data item is known.
 *
 * @param isAcceptable Same as for udata_openChoice(). May be NULL.
 *
 * @internal ICU 2.8
 */
U_CAPI UDataMemory * U_EXPORT2
udata_openSwap(const char *path, const char *type, const char *name,
               UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
               UDataSwapFn *swap,
               UDataPrintError *printError, void *printErrorContext,
               UErrorCode *pErrorCode);

#endif


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2004-2007, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  uset_imp.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2004sep07
*   created by: Markus W. Scherer
*
*   Internal USet definitions.
*/





U_CDECL_BEGIN

typedef void U_CALLCONV
USetAdd(USet *set, UChar32 c);

typedef void U_CALLCONV
USetAddRange(USet *set, UChar32 start, UChar32 end);

typedef void U_CALLCONV
USetAddString(USet *set, const UChar *str, int32_t length);

typedef void U_CALLCONV
USetRemove(USet *set, UChar32 c);

typedef void U_CALLCONV
USetRemoveRange(USet *set, UChar32 start, UChar32 end);

/**
 * Interface for adding items to a USet, to keep low-level code from
 * statically depending on the USet implementation.
 * Calls will look like sa->add(sa->set, c);
 */
struct USetAdder {
    USet *set;
    USetAdd *add;
    USetAddRange *addRange;
    USetAddString *addString;
    USetRemove *remove;
    USetRemoveRange *removeRange;
};
typedef struct USetAdder USetAdder;

U_CDECL_END



// When the nfc.nrm data is *not* hardcoded into the common library
// (with this constant set to 0),
// then it needs to be built into the data package:
// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT
#define NORM2_HARDCODE_NFC_DATA 1

U_NAMESPACE_BEGIN

struct CanonIterData;

class ByteSink;
class Edits;
class InitCanonIterData;
class LcccContext;

class U_COMMON_API Hangul {
public:
    /* Korean Hangul and Jamo constants */
    enum {
        JAMO_L_BASE=0x1100,     /* "lead" jamo */
        JAMO_L_END=0x1112,
        JAMO_V_BASE=0x1161,     /* "vowel" jamo */
        JAMO_V_END=0x1175,
        JAMO_T_BASE=0x11a7,     /* "trail" jamo */
        JAMO_T_END=0x11c2,

        HANGUL_BASE=0xac00,
        HANGUL_END=0xd7a3,

        JAMO_L_COUNT=19,
        JAMO_V_COUNT=21,
        JAMO_T_COUNT=28,

        JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,

        HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
        HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
    };

    static inline UBool isHangul(UChar32 c) {
        return HANGUL_BASE<=c && c<HANGUL_LIMIT;
    }
    static inline UBool
    isHangulLV(UChar32 c) {
        c-=HANGUL_BASE;
        return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
    }
    static inline UBool isJamoL(UChar32 c) {
        return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
    }
    static inline UBool isJamoV(UChar32 c) {
        return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
    }
    static inline UBool isJamoT(UChar32 c) {
        int32_t t=c-JAMO_T_BASE;
        return 0<t && t<JAMO_T_COUNT;  // not JAMO_T_BASE itself
    }
    static UBool isJamo(UChar32 c) {
        return JAMO_L_BASE<=c && c<=JAMO_T_END &&
            (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
    }

    /**
     * Decomposes c, which must be a Hangul syllable, into buffer
     * and returns the length of the decomposition (2 or 3).
     */
    static inline int32_t decompose(UChar32 c, UChar buffer[3]) {
        c-=HANGUL_BASE;
        UChar32 c2=c%JAMO_T_COUNT;
        c/=JAMO_T_COUNT;
        buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
        buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
        if(c2==0) {
            return 2;
        } else {
            buffer[2]=(UChar)(JAMO_T_BASE+c2);
            return 3;
        }
    }

    /**
     * Decomposes c, which must be a Hangul syllable, into buffer.
     * This is the raw, not recursive, decomposition. Its length is always 2.
     */
    static inline void getRawDecomposition(UChar32 c, UChar buffer[2]) {
        UChar32 orig=c;
        c-=HANGUL_BASE;
        UChar32 c2=c%JAMO_T_COUNT;
        if(c2==0) {
            c/=JAMO_T_COUNT;
            buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
            buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
        } else {
            buffer[0]=(UChar)(orig-c2);  // LV syllable
            buffer[1]=(UChar)(JAMO_T_BASE+c2);
        }
    }
private:
    Hangul();  // no instantiation
};

class Normalizer2Impl;

class U_COMMON_API ReorderingBuffer : public UMemory {
public:
    /** Constructs only; init() should be called. */
    ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
        impl(ni), str(dest),
        start(NULL), reorderStart(NULL), limit(NULL),
        remainingCapacity(0), lastCC(0) {}
    /** Constructs, removes the string contents, and initializes for a small initial capacity. */
    ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
    ~ReorderingBuffer() {
        if(start!=NULL) {
            str.releaseBuffer((int32_t)(limit-start));
        }
    }
    UBool init(int32_t destCapacity, UErrorCode &errorCode);

    UBool isEmpty() const { return start==limit; }
    int32_t length() const { return (int32_t)(limit-start); }
    UChar *getStart() { return start; }
    UChar *getLimit() { return limit; }
    uint8_t getLastCC() const { return lastCC; }

    UBool equals(const UChar *start, const UChar *limit) const;
    UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const;

    UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
        return (c<=0xffff) ?
            appendBMP((UChar)c, cc, errorCode) :
            appendSupplementary(c, cc, errorCode);
    }
    UBool append(const UChar *s, int32_t length, UBool isNFD,
                 uint8_t leadCC, uint8_t trailCC,
                 UErrorCode &errorCode);
    UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
        if(remainingCapacity==0 && !resize(1, errorCode)) {
            return FALSE;
        }
        if(lastCC<=cc || cc==0) {
            *limit++=c;
            lastCC=cc;
            if(cc<=1) {
                reorderStart=limit;
            }
        } else {
            insert(c, cc);
        }
        --remainingCapacity;
        return TRUE;
    }
    UBool appendZeroCC(UChar32 c, UErrorCode &errorCode);
    UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode);
    void remove();
    void removeSuffix(int32_t suffixLength);
    void setReorderingLimit(UChar *newLimit) {
        remainingCapacity+=(int32_t)(limit-newLimit);
        reorderStart=limit=newLimit;
        lastCC=0;
    }
    void copyReorderableSuffixTo(UnicodeString &s) const {
        s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart));
    }
private:
    /*
     * TODO: Revisit whether it makes sense to track reorderStart.
     * It is set to after the last known character with cc<=1,
     * which stops previousCC() before it reads that character and looks up its cc.
     * previousCC() is normally only called from insert().
     * In other words, reorderStart speeds up the insertion of a combining mark
     * into a multi-combining mark sequence where it does not belong at the end.
     * This might not be worth the trouble.
     * On the other hand, it's not a huge amount of trouble.
     *
     * We probably need it for UNORM_SIMPLE_APPEND.
     */

    UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode);
    void insert(UChar32 c, uint8_t cc);
    static void writeCodePoint(UChar *p, UChar32 c) {
        if(c<=0xffff) {
            *p=(UChar)c;
        } else {
            p[0]=U16_LEAD(c);
            p[1]=U16_TRAIL(c);
        }
    }
    UBool resize(int32_t appendLength, UErrorCode &errorCode);

    const Normalizer2Impl &impl;
    UnicodeString &str;
    UChar *start, *reorderStart, *limit;
    int32_t remainingCapacity;
    uint8_t lastCC;

    // private backward iterator
    void setIterator() { codePointStart=limit; }
    void skipPrevious();  // Requires start<codePointStart.
    uint8_t previousCC();  // Returns 0 if there is no previous character.

    UChar *codePointStart, *codePointLimit;
};

/**
 * Low-level implementation of the Unicode Normalization Algorithm.
 * For the data structure and details see the documentation at the end of
 * this normalizer2impl.h and in the design doc at
 * http://site.icu-project.org/design/normalization/custom
 */
class U_COMMON_API Normalizer2Impl : public UObject {
public:
    Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { }
    virtual ~Normalizer2Impl();

    void init(const int32_t *inIndexes, const UCPTrie *inTrie,
              const uint16_t *inExtraData, const uint8_t *inSmallFCD);

    void addLcccChars(UnicodeSet &set) const;
    void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
    void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;

    // low-level properties ------------------------------------------------ ***

    UBool ensureCanonIterData(UErrorCode &errorCode) const;

    // The trie stores values for lead surrogate code *units*.
    // Surrogate code *points* are inert.
    uint16_t getNorm16(UChar32 c) const {
        return U_IS_LEAD(c) ?
            static_cast<uint16_t>(INERT) :
            UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
    }
    uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }

    UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
        if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
            return UNORM_YES;
        } else if(minMaybeYes<=norm16) {
            return UNORM_MAYBE;
        } else {
            return UNORM_NO;
        }
    }
    UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; }
    UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; }
    UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }

    uint8_t getCC(uint16_t norm16) const {
        if(norm16>=MIN_NORMAL_MAYBE_YES) {
            return getCCFromNormalYesOrMaybe(norm16);
        }
        if(norm16<minNoNo || limitNoNo<=norm16) {
            return 0;
        }
        return getCCFromNoNo(norm16);
    }
    static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
        return (uint8_t)(norm16 >> OFFSET_SHIFT);
    }
    static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
        return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
    }
    uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
        if (c < minCompNoMaybeCP) { return 0; }
        return getCCFromYesOrMaybe(getNorm16(c));
    }

    /**
     * Returns the FCD data for code point c.
     * @param c A Unicode code point.
     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
     */
    uint16_t getFCD16(UChar32 c) const {
        if(c<minDecompNoCP) {
            return 0;
        } else if(c<=0xffff) {
            if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
        }
        return getFCD16FromNormData(c);
    }
    /**
     * Returns the FCD data for the next code point (post-increment).
     * Might skip only a lead surrogate rather than the whole surrogate pair if none of
     * the supplementary code points associated with the lead surrogate have non-zero FCD data.
     * @param s A valid pointer into a string. Requires s!=limit.
     * @param limit The end of the string, or NULL.
     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
     */
    uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
        UChar32 c=*s++;
        if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
            return 0;
        }
        UChar c2;
        if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) {
            c=U16_GET_SUPPLEMENTARY(c, c2);
            ++s;
        }
        return getFCD16FromNormData(c);
    }
    /**
     * Returns the FCD data for the previous code point (pre-decrement).
     * @param start The start of the string.
     * @param s A valid pointer into a string. Requires start<s.
     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
     */
    uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
        UChar32 c=*--s;
        if(c<minDecompNoCP) {
            return 0;
        }
        if(!U16_IS_TRAIL(c)) {
            if(!singleLeadMightHaveNonZeroFCD16(c)) {
                return 0;
            }
        } else {
            UChar c2;
            if(start<s && U16_IS_LEAD(c2=*(s-1))) {
                c=U16_GET_SUPPLEMENTARY(c2, c);
                --s;
            }
        }
        return getFCD16FromNormData(c);
    }

    /** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
    UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
        // 0<=lead<=0xffff
        uint8_t bits=smallFCD[lead>>8];
        if(bits==0) { return false; }
        return (UBool)((bits>>((lead>>5)&7))&1);
    }
    /** Returns the FCD value from the regular normalization data. */
    uint16_t getFCD16FromNormData(UChar32 c) const;

    /**
     * Gets the decomposition for one code point.
     * @param c code point
     * @param buffer out-only buffer for algorithmic decompositions
     * @param length out-only, takes the length of the decomposition, if any
     * @return pointer to the decomposition, or NULL if none
     */
    const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const;

    /**
     * Gets the raw decomposition for one code point.
     * @param c code point
     * @param buffer out-only buffer for algorithmic decompositions
     * @param length out-only, takes the length of the decomposition, if any
     * @return pointer to the decomposition, or NULL if none
     */
    const UChar *getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const;

    UChar32 composePair(UChar32 a, UChar32 b) const;

    UBool isCanonSegmentStarter(UChar32 c) const;
    UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;

    enum {
        // Fixed norm16 values.
        MIN_YES_YES_WITH_CC=0xfe02,
        JAMO_VT=0xfe00,
        MIN_NORMAL_MAYBE_YES=0xfc00,
        JAMO_L=2,  // offset=1 hasCompBoundaryAfter=FALSE
        INERT=1,  // offset=0 hasCompBoundaryAfter=TRUE

        // norm16 bit 0 is comp-boundary-after.
        HAS_COMP_BOUNDARY_AFTER=1,
        OFFSET_SHIFT=1,

        // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
        // tccc (0, 1, >1) for quick FCC boundary-after tests.
        DELTA_TCCC_0=0,
        DELTA_TCCC_1=2,
        DELTA_TCCC_GT_1=4,
        DELTA_TCCC_MASK=6,
        DELTA_SHIFT=3,

        MAX_DELTA=0x40
    };

    enum {
        // Byte offsets from the start of the data, after the generic header.
        IX_NORM_TRIE_OFFSET,
        IX_EXTRA_DATA_OFFSET,
        IX_SMALL_FCD_OFFSET,
        IX_RESERVED3_OFFSET,
        IX_RESERVED4_OFFSET,
        IX_RESERVED5_OFFSET,
        IX_RESERVED6_OFFSET,
        IX_TOTAL_SIZE,

        // Code point thresholds for quick check codes.
        IX_MIN_DECOMP_NO_CP,
        IX_MIN_COMP_NO_MAYBE_CP,

        // Norm16 value thresholds for quick check combinations and types of extra data.

        /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
        IX_MIN_YES_NO,
        /** Mappings are comp-normalized. */
        IX_MIN_NO_NO,
        IX_LIMIT_NO_NO,
        IX_MIN_MAYBE_YES,

        /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
        IX_MIN_YES_NO_MAPPINGS_ONLY,
        /** Mappings are not comp-normalized but have a comp boundary before. */
        IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE,
        /** Mappings do not have a comp boundary before. */
        IX_MIN_NO_NO_COMP_NO_MAYBE_CC,
        /** Mappings to the empty string. */
        IX_MIN_NO_NO_EMPTY,

        IX_MIN_LCCC_CP,
        IX_RESERVED19,
        IX_COUNT
    };

    enum {
        MAPPING_HAS_CCC_LCCC_WORD=0x80,
        MAPPING_HAS_RAW_MAPPING=0x40,
        // unused bit 0x20,
        MAPPING_LENGTH_MASK=0x1f
    };

    enum {
        COMP_1_LAST_TUPLE=0x8000,
        COMP_1_TRIPLE=1,
        COMP_1_TRAIL_LIMIT=0x3400,
        COMP_1_TRAIL_MASK=0x7ffe,
        COMP_1_TRAIL_SHIFT=9,  // 10-1 for the "triple" bit
        COMP_2_TRAIL_SHIFT=6,
        COMP_2_TRAIL_MASK=0xffc0
    };

    // higher-level functionality ------------------------------------------ ***

    // NFD without an NFD Normalizer2 instance.
    UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,
                             UErrorCode &errorCode) const;
    /**
     * Decomposes [src, limit[ and writes the result to dest.
     * limit can be NULL if src is NUL-terminated.
     * destLengthEstimate is the initial dest buffer capacity and can be -1.
     */
    void decompose(const UChar *src, const UChar *limit,
                   UnicodeString &dest, int32_t destLengthEstimate,
                   UErrorCode &errorCode) const;

    const UChar *decompose(const UChar *src, const UChar *limit,
                           ReorderingBuffer *buffer, UErrorCode &errorCode) const;
    void decomposeAndAppend(const UChar *src, const UChar *limit,
                            UBool doDecompose,
                            UnicodeString &safeMiddle,
                            ReorderingBuffer &buffer,
                            UErrorCode &errorCode) const;
    UBool compose(const UChar *src, const UChar *limit,
                  UBool onlyContiguous,
                  UBool doCompose,
                  ReorderingBuffer &buffer,
                  UErrorCode &errorCode) const;
    const UChar *composeQuickCheck(const UChar *src, const UChar *limit,
                                   UBool onlyContiguous,
                                   UNormalizationCheckResult *pQCResult) const;
    void composeAndAppend(const UChar *src, const UChar *limit,
                          UBool doCompose,
                          UBool onlyContiguous,
                          UnicodeString &safeMiddle,
                          ReorderingBuffer &buffer,
                          UErrorCode &errorCode) const;

    /** sink==nullptr: isNormalized() */
    UBool composeUTF8(uint32_t options, UBool onlyContiguous,
                      const uint8_t *src, const uint8_t *limit,
                      ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;

    const UChar *makeFCD(const UChar *src, const UChar *limit,
                         ReorderingBuffer *buffer, UErrorCode &errorCode) const;
    void makeFCDAndAppend(const UChar *src, const UChar *limit,
                          UBool doMakeFCD,
                          UnicodeString &safeMiddle,
                          ReorderingBuffer &buffer,
                          UErrorCode &errorCode) const;

    UBool hasDecompBoundaryBefore(UChar32 c) const;
    UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
    UBool hasDecompBoundaryAfter(UChar32 c) const;
    UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
    UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }

    UBool hasCompBoundaryBefore(UChar32 c) const {
        return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
    }
    UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
        return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
    }
    UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
        uint16_t norm16=getNorm16(c);
        return isCompYesAndZeroCC(norm16) &&
            (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
            (!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
    }

    UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
    UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
    UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
private:
    friend class InitCanonIterData;
    friend class LcccContext;

    UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
    UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
    static UBool isInert(uint16_t norm16) { return norm16==INERT; }
    static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
    static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
    uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
    UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; }
    UBool isHangulLVT(uint16_t norm16) const {
        return norm16==hangulLVT();
    }
    UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
    // UBool isCompYes(uint16_t norm16) const {
    //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
    // }
    // UBool isCompYesOrMaybe(uint16_t norm16) const {
    //     return norm16<minNoNo || minMaybeYes<=norm16;
    // }
    // UBool hasZeroCCFromDecompYes(uint16_t norm16) const {
    //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
    // }
    UBool isDecompYesAndZeroCC(uint16_t norm16) const {
        return norm16<minYesNo ||
               norm16==JAMO_VT ||
               (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
    }
    /**
     * A little faster and simpler than isDecompYesAndZeroCC() but does not include
     * the MaybeYes which combine-forward and have ccc=0.
     * (Standard Unicode 10 normalization does not have such characters.)
     */
    UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
        return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
    }
    UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; }

    // For use with isCompYes().
    // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
    // static uint8_t getCCFromYes(uint16_t norm16) {
    //     return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
    // }
    uint8_t getCCFromNoNo(uint16_t norm16) const {
        const uint16_t *mapping=getMapping(norm16);
        if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) {
            return (uint8_t)*(mapping-1);
        } else {
            return 0;
        }
    }
    // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
    uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const {
        if(norm16<=minYesNo) {
            return 0;  // yesYes and Hangul LV have ccc=tccc=0
        } else {
            // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
            return (uint8_t)(*getMapping(norm16)>>8);  // tccc from yesNo
        }
    }
    uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const;
    uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const;

    // Requires algorithmic-NoNo.
    UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
        return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
    }
    UChar32 getAlgorithmicDelta(uint16_t norm16) const {
        return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
    }

    // Requires minYesNo<norm16<limitNoNo.
    const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
    const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
        if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
            return NULL;
        } else if(norm16<minMaybeYes) {
            return getMapping(norm16);  // for yesYes; if Jamo L: harmless empty list
        } else {
            return maybeYesCompositions+norm16-minMaybeYes;
        }
    }
    const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
        // A composite has both mapping & compositions list.
        const uint16_t *list=getMapping(norm16);
        return list+  // mapping pointer
            1+  // +1 to skip the first unit with the mapping length
            (*list&MAPPING_LENGTH_MASK);  // + mapping length
    }
    const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
        // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
        return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
    }
    /**
     * @param c code point must have compositions
     * @return compositions list pointer
     */
    const uint16_t *getCompositionsList(uint16_t norm16) const {
        return isDecompYes(norm16) ?
                getCompositionsListForDecompYes(norm16) :
                getCompositionsListForComposite(norm16);
    }

    const UChar *copyLowPrefixFromNulTerminated(const UChar *src,
                                                UChar32 minNeedDataCP,
                                                ReorderingBuffer *buffer,
                                                UErrorCode &errorCode) const;
    const UChar *decomposeShort(const UChar *src, const UChar *limit,
                                UBool stopAtCompBoundary, UBool onlyContiguous,
                                ReorderingBuffer &buffer, UErrorCode &errorCode) const;
    UBool decompose(UChar32 c, uint16_t norm16,
                    ReorderingBuffer &buffer, UErrorCode &errorCode) const;

    const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
                                  UBool stopAtCompBoundary, UBool onlyContiguous,
                                  ReorderingBuffer &buffer, UErrorCode &errorCode) const;

    static int32_t combine(const uint16_t *list, UChar32 trail);
    void addComposites(const uint16_t *list, UnicodeSet &set) const;
    void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
                   UBool onlyContiguous) const;

    UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
        return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
    }
    UBool norm16HasCompBoundaryBefore(uint16_t norm16) const  {
        return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
    }
    UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const;
    UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const;
    UBool hasCompBoundaryAfter(const UChar *start, const UChar *p,
                               UBool onlyContiguous) const;
    UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
                               UBool onlyContiguous) const;
    UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const {
        return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
            (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
    }
    /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
    UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
        return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
            (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
    }

    const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const;
    const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const;

    const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
    const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;

    void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
                                     CanonIterData &newData, UErrorCode &errorCode) const;

    int32_t getCanonValue(UChar32 c) const;
    const UnicodeSet &getCanonStartSet(int32_t n) const;

    // UVersionInfo dataVersion;

    // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
    UChar minDecompNoCP;
    UChar minCompNoMaybeCP;
    UChar minLcccCP;

    // Norm16 value thresholds for quick check combinations and types of extra data.
    uint16_t minYesNo;
    uint16_t minYesNoMappingsOnly;
    uint16_t minNoNo;
    uint16_t minNoNoCompBoundaryBefore;
    uint16_t minNoNoCompNoMaybeCC;
    uint16_t minNoNoEmpty;
    uint16_t limitNoNo;
    uint16_t centerNoNoDelta;
    uint16_t minMaybeYes;

    const UCPTrie *normTrie;
    const uint16_t *maybeYesCompositions;
    const uint16_t *extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
    const uint8_t *smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0

    UInitOnce       fCanonIterDataInitOnce = U_INITONCE_INITIALIZER;
    CanonIterData  *fCanonIterData;
};

// bits in canonIterData
#define CANON_NOT_SEGMENT_STARTER 0x80000000
#define CANON_HAS_COMPOSITIONS 0x40000000
#define CANON_HAS_SET 0x200000
#define CANON_VALUE_MASK 0x1fffff

/**
 * ICU-internal shortcut for quick access to standard Unicode normalization.
 */
class U_COMMON_API Normalizer2Factory {
public:
    static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
    static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
    static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);

    static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);

    static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);
    static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
    static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);

    // Get the Impl instance of the Normalizer2.
    // Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
    static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
private:
    Normalizer2Factory();  // No instantiation.
};

U_NAMESPACE_END

U_CAPI int32_t U_EXPORT2
unorm2_swap(const UDataSwapper *ds,
            const void *inData, int32_t length, void *outData,
            UErrorCode *pErrorCode);

/**
 * Get the NF*_QC property for a code point, for u_getIntPropertyValue().
 * @internal
 */
U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);

/**
 * Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
 * @internal
 */
U_CFUNC uint16_t
unorm_getFCD16(UChar32 c);

/**
 * Format of Normalizer2 .nrm data files.
 * Format version 4.0.
 *
 * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
 * ICU ships with data files for standard Unicode Normalization Forms
 * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
 * Custom (application-specific) data can be built into additional .nrm files
 * with the gennorm2 build tool.
 * ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
 *
 * Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
 * cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
 *
 * A .nrm file begins with a standard ICU data file header
 * (DataHeader, see ucmndata.h and unicode/udata.h).
 * The UDataInfo.dataVersion field usually contains the Unicode version
 * for which the data was generated.
 *
 * After the header, the file contains the following parts.
 * Constants are defined as enum values of the Normalizer2Impl class.
 *
 * Many details of the data structures are described in the design doc
 * which is at http://site.icu-project.org/design/normalization/custom
 *
 * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4;
 *
 *      The first eight indexes are byte offsets in ascending order.
 *      Each byte offset marks the start of the next part in the data file,
 *      and the end of the previous one.
 *      When two consecutive byte offsets are the same, then the corresponding part is empty.
 *      Byte offsets are offsets from after the header,
 *      that is, from the beginning of the indexes[].
 *      Each part starts at an offset with proper alignment for its data.
 *      If necessary, the previous part may include padding bytes to achieve this alignment.
 *
 *      minDecompNoCP=indexes[IX_MIN_DECOMP_NO_CP] is the lowest code point
 *      with a decomposition mapping, that is, with NF*D_QC=No.
 *      minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
 *      with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
 *      minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3)
 *      is the lowest code point with lccc!=0.
 *
 *      The next eight indexes are thresholds of 16-bit trie values for ranges of
 *      values indicating multiple normalization properties.
 *      They are listed here in threshold order, not in the order they are stored in the indexes.
 *          minYesNo=indexes[IX_MIN_YES_NO];
 *          minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
 *          minNoNo=indexes[IX_MIN_NO_NO];
 *          minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
 *          minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
 *          minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
 *          limitNoNo=indexes[IX_LIMIT_NO_NO];
 *          minMaybeYes=indexes[IX_MIN_MAYBE_YES];
 *      See the normTrie description below and the design doc for details.
 *
 * UCPTrie normTrie; -- see ucptrie_impl.h and ucptrie.h, same as Java CodePointTrie
 *
 *      The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
 *      Rather than using independent bits in the value (which would require more than 16 bits),
 *      information is extracted primarily via range checks.
 *      Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
 *      For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
 *      means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
 *      which means it has a two-way (round-trip) decomposition mapping.
 *      Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
 *      pointing to mappings, compositions lists, or both.
 *      Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
 *      means that the character is normalization-inert, that is,
 *      it does not have a mapping, does not participate in composition, has a zero
 *      canonical combining class, and forms a boundary where text before it and after it
 *      can be normalized independently.
 *      For details about how multiple properties are encoded in 16-bit values
 *      see the design doc.
 *      Note that the encoding cannot express all combinations of the properties involved;
 *      it only supports those combinations that are allowed by
 *      the Unicode Normalization algorithms. Details are in the design doc as well.
 *      The gennorm2 tool only builds .nrm files for data that conforms to the limitations.
 *
 *      The trie has a value for each lead surrogate code unit representing the "worst case"
 *      properties of the 1024 supplementary characters whose UTF-16 form starts with
 *      the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
 *      then their lead surrogate code unit has the trie value INERT.
 *      When the lead surrogate unit's value exceeds the quick check minimum during processing,
 *      the properties for the full supplementary code point need to be looked up.
 *
 * uint16_t maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes];
 * uint16_t extraData[];
 *
 *      There is only one byte offset for the end of these two arrays.
 *      The split between them is given by the constant and variable mentioned above.
 *      In version 3, the difference must be shifted right by OFFSET_SHIFT.
 *
 *      The maybeYesCompositions array contains compositions lists for characters that
 *      combine both forward (as starters in composition pairs)
 *      and backward (as trailing characters in composition pairs).
 *      Such characters do not occur in Unicode 5.2 but are allowed by
 *      the Unicode Normalization algorithms.
 *      If there are no such characters, then minMaybeYes==MIN_NORMAL_MAYBE_YES
 *      and the maybeYesCompositions array is empty.
 *      If there are such characters, then minMaybeYes is subtracted from their norm16 values
 *      to get the index into this array.
 *
 *      The extraData array contains compositions lists for "YesYes" characters,
 *      followed by mappings and optional compositions lists for "YesNo" characters,
 *      followed by only mappings for "NoNo" characters.
 *      (Referring to pairs of NFC/NFD quick check values.)
 *      The norm16 values of those characters are directly indexes into the extraData array.
 *      In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
 *      for accessing extraData.
 *
 *      The data structures for compositions lists and mappings are described in the design doc.
 *
 * uint8_t smallFCD[0x100]; -- new in format version 2
 *
 *      This is a bit set to help speed up FCD value lookups in the absence of a full
 *      UTrie2 or other large data structure with the full FCD value mapping.
 *
 *      Each smallFCD bit is set if any of the corresponding 32 BMP code points
 *      has a non-zero FCD value (lccc!=0 or tccc!=0).
 *      Bit 0 of smallFCD[0] is for U+0000..U+001F. Bit 7 of smallFCD[0xff] is for U+FFE0..U+FFFF.
 *      A bit for 32 lead surrogates is set if any of the 32k corresponding
 *      _supplementary_ code points has a non-zero FCD value.
 *
 *      This bit set is most useful for the large blocks of CJK characters with FCD=0.
 *
 * Changes from format version 1 to format version 2 ---------------------------
 *
 * - Addition of data for raw (not recursively decomposed) mappings.
 *   + The MAPPING_NO_COMP_BOUNDARY_AFTER bit in the extraData is now also set when
 *     the mapping is to an empty string or when the character combines-forward.
 *     This subsumes the one actual use of the MAPPING_PLUS_COMPOSITION_LIST bit which
 *     is then repurposed for the MAPPING_HAS_RAW_MAPPING bit.
 *   + For details see the design doc.
 * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesNo extraData into
 *   distinct ranges (combines-forward vs. not)
 *   so that a range check can be used to find out if there is a compositions list.
 *   This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
 *   It is needed for the new (in ICU 49) composePair(), not for other normalization.
 * - Addition of the smallFCD[] bit set.
 *
 * Changes from format version 2 to format version 3 (ICU 60) ------------------
 *
 * - norm16 bit 0 indicates hasCompBoundaryAfter(),
 *   except that for contiguous composition (FCC) the tccc must be checked as well.
 *   Data indexes and ccc values are shifted left by one (OFFSET_SHIFT).
 *   Thresholds like minNoNo are tested before shifting.
 *
 * - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT),
 *   to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
 *   See DELTA_TCCC_MASK etc.
 *   This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
 *   minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
 *
 * - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
 *   and ASCII characters are mapped algorithmically only to other ASCII characters.
 *   This helps with hasCompBoundaryBefore() and compose() fast paths.
 *   It is never necessary any more to loop for algorithmic mappings.
 *
 * - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE],
 *   indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY],
 *   and separation of the noNo extraData into distinct ranges.
 *   With this, the noNo norm16 value indicates whether the mapping is
 *   compose-normalized, not normalized but hasCompBoundaryBefore(),
 *   not even that, or maps to an empty string.
 *   hasCompBoundaryBefore() can be determined solely from the norm16 value.
 *
 * - The norm16 value for Hangul LVT is now different from that for Hangul LV,
 *   so that hasCompBoundaryAfter() need not check for the syllable type.
 *   For Hangul LV, minYesNo continues to be used (no comp-boundary-after).
 *   For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used.
 *   The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively,
 *   to simplify some code.
 *
 * - The extraData firstUnit bit 5 is no longer necessary
 *   (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER),
 *   is reserved again, and always set to 0.
 *
 * - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0.
 *   This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower:
 *   U+00AD Soft Hyphen maps to an empty string,
 *   which is artificially assigned "worst case" values lccc=1 and tccc=255.
 *
 * - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
 *
 * Changes from format version 3 to format version 4 (ICU 63) ------------------
 *
 * Switched from UTrie2 to UCPTrie/CodePointTrie.
 *
 * The new trie no longer stores different values for surrogate code *units* vs.
 * surrogate code *points*.
 * Lead surrogates still have values for optimized UTF-16 string processing.
 * When looking up code point properties, the code now checks for lead surrogates and
 * treats them as inert.
 *
 * gennorm2 now has to reject mappings for surrogate code points.
 * UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its
 * custom normalization data file.
 */

#endif  /* !UCONFIG_NO_NORMALIZATION */


/**
 * This class allows one to iterate through all the strings that are canonically equivalent to a given
 * string. For example, here are some sample results:
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
1: \u0041\u030A\u0064\u0307\u0327
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
2: \u0041\u030A\u0064\u0327\u0307
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
3: \u0041\u030A\u1E0B\u0327
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
4: \u0041\u030A\u1E11\u0307
 = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
5: \u00C5\u0064\u0307\u0327
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
6: \u00C5\u0064\u0327\u0307
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
7: \u00C5\u1E0B\u0327
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
8: \u00C5\u1E11\u0307
 = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
9: \u212B\u0064\u0307\u0327
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
10: \u212B\u0064\u0327\u0307
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
11: \u212B\u1E0B\u0327
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
12: \u212B\u1E11\u0307
 = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
 *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
 * since it has not been optimized for that situation.
 *@author M. Davis
 *@draft
 */

// public

U_NAMESPACE_BEGIN

// TODO: add boilerplate methods.

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)

/**
 *@param source string to get results for
 */
CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
    pieces(NULL),
    pieces_length(0),
    pieces_lengths(NULL),
    current(NULL),
    current_length(0),
    nfd(*Normalizer2::getNFDInstance(status)),
    nfcImpl(*Normalizer2Factory::getNFCImpl(status))
{
    if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
      setSource(sourceStr, status);
    }
}

CanonicalIterator::~CanonicalIterator() {
  cleanPieces();
}

void CanonicalIterator::cleanPieces() {
    int32_t i = 0;
    if(pieces != NULL) {
        for(i = 0; i < pieces_length; i++) {
            if(pieces[i] != NULL) {
                delete[] pieces[i];
            }
        }
        uprv_free(pieces);
        pieces = NULL;
        pieces_length = 0;
    }
    if(pieces_lengths != NULL) {
        uprv_free(pieces_lengths);
        pieces_lengths = NULL;
    }
    if(current != NULL) {
        uprv_free(current);
        current = NULL;
        current_length = 0;
    }
}

/**
 *@return gets the source: NOTE: it is the NFD form of source
 */
UnicodeString CanonicalIterator::getSource() {
  return source;
}

/**
 * Resets the iterator so that one can start again from the beginning.
 */
void CanonicalIterator::reset() {
    done = FALSE;
    for (int i = 0; i < current_length; ++i) {
        current[i] = 0;
    }
}

/**
 *@return the next string that is canonically equivalent. The value null is returned when
 * the iteration is done.
 */
UnicodeString CanonicalIterator::next() {
    int32_t i = 0;

    if (done) {
      buffer.setToBogus();
      return buffer;
    }

    // delete old contents
    buffer.remove();

    // construct return value

    for (i = 0; i < pieces_length; ++i) {
        buffer.append(pieces[i][current[i]]);
    }
    //String result = buffer.toString(); // not needed

    // find next value for next time

    for (i = current_length - 1; ; --i) {
        if (i < 0) {
            done = TRUE;
            break;
        }
        current[i]++;
        if (current[i] < pieces_lengths[i]) break; // got sequence
        current[i] = 0;
    }
    return buffer;
}

/**
 *@param set the source string to iterate against. This allows the same iterator to be used
 * while changing the source string, saving object creation.
 */
void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
    int32_t list_length = 0;
    UChar32 cp = 0;
    int32_t start = 0;
    int32_t i = 0;
    UnicodeString *list = NULL;

    nfd.normalize(newSource, source, status);
    if(U_FAILURE(status)) {
      return;
    }
    done = FALSE;

    cleanPieces();

    // catch degenerate case
    if (newSource.length() == 0) {
        pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
        pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
        pieces_length = 1;
        current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
        current_length = 1;
        if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CleanPartialInitialization;
        }
        current[0] = 0;
        pieces[0] = new UnicodeString[1];
        pieces_lengths[0] = 1;
        if (pieces[0] == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CleanPartialInitialization;
        }
        return;
    }


    list = new UnicodeString[source.length()];
    if (list == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto CleanPartialInitialization;
    }

    // i should initialy be the number of code units at the 
    // start of the string
    i = U16_LENGTH(source.char32At(0));
    //int32_t i = 1;
    // find the segments
    // This code iterates through the source string and 
    // extracts segments that end up on a codepoint that
    // doesn't start any decompositions. (Analysis is done
    // on the NFD form - see above).
    for (; i < source.length(); i += U16_LENGTH(cp)) {
        cp = source.char32At(i);
        if (nfcImpl.isCanonSegmentStarter(cp)) {
            source.extract(start, i-start, list[list_length++]); // add up to i
            start = i;
        }
    }
    source.extract(start, i-start, list[list_length++]); // add last one


    // allocate the arrays, and find the strings that are CE to each segment
    pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
    pieces_length = list_length;
    pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
    current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
    current_length = list_length;
    if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto CleanPartialInitialization;
    }

    for (i = 0; i < current_length; i++) {
        current[i] = 0;
    }
    // for each segment, get all the combinations that can produce 
    // it after NFD normalization
    for (i = 0; i < pieces_length; ++i) {
        //if (PROGRESS) printf("SEGMENT\n");
        pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
    }

    delete[] list;
    return;
// Common section to cleanup all local variables and reset object variables.
CleanPartialInitialization:
    if (list != NULL) {
        delete[] list;
    }
    cleanPieces();
}

/**
 * Dumb recursive implementation of permutation.
 * TODO: optimize
 * @param source the string to find permutations for
 * @return the results in a set.
 */
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
    if(U_FAILURE(status)) {
        return;
    }
    //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
    int32_t i = 0;

    // optimization:
    // if zero or one character, just return a set with it
    // we check for length < 2 to keep from counting code points all the time
    if (source.length() <= 2 && source.countChar32() <= 1) {
        UnicodeString *toPut = new UnicodeString(source);
        /* test for NULL */
        if (toPut == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        result->put(source, toPut, status);
        return;
    }

    // otherwise iterate through the string, and recursively permute all the other characters
    UChar32 cp;
    Hashtable subpermute(status);
    if(U_FAILURE(status)) {
        return;
    }
    subpermute.setValueDeleter(uprv_deleteUObject);

    for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
        cp = source.char32At(i);
        const UHashElement *ne = NULL;
        int32_t el = UHASH_FIRST;
        UnicodeString subPermuteString = source;

        // optimization:
        // if the character is canonical combining class zero,
        // don't permute it
        if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
            //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
            continue;
        }

        subpermute.removeAll();

        // see what the permutations of the characters before and after this one are
        //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
        permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
        /* Test for buffer overflows */
        if(U_FAILURE(status)) {
            return;
        }
        // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents 
        // of source at this point.

        // prefix this character to all of them
        ne = subpermute.nextElement(el);
        while (ne != NULL) {
            UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
            UnicodeString *chStr = new UnicodeString(cp);
            //test for  NULL
            if (chStr == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
            //if (PROGRESS) printf("  Piece: %s\n", UToS(*chStr));
            result->put(*chStr, chStr, status);
            ne = subpermute.nextElement(el);
        }
    }
    //return result;
}

// privates

// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
    Hashtable result(status);
    Hashtable permutations(status);
    Hashtable basic(status);
    if (U_FAILURE(status)) {
        return 0;
    }
    result.setValueDeleter(uprv_deleteUObject);
    permutations.setValueDeleter(uprv_deleteUObject);
    basic.setValueDeleter(uprv_deleteUObject);

    UChar USeg[256];
    int32_t segLen = segment.extract(USeg, 256, status);
    getEquivalents2(&basic, USeg, segLen, status);

    // now get all the permutations
    // add only the ones that are canonically equivalent
    // TODO: optimize by not permuting any class zero.

    const UHashElement *ne = NULL;
    int32_t el = UHASH_FIRST;
    //Iterator it = basic.iterator();
    ne = basic.nextElement(el);
    //while (it.hasNext())
    while (ne != NULL) {
        //String item = (String) it.next();
        UnicodeString item = *((UnicodeString *)(ne->value.pointer));

        permutations.removeAll();
        permute(item, CANITER_SKIP_ZEROES, &permutations, status);
        const UHashElement *ne2 = NULL;
        int32_t el2 = UHASH_FIRST;
        //Iterator it2 = permutations.iterator();
        ne2 = permutations.nextElement(el2);
        //while (it2.hasNext())
        while (ne2 != NULL) {
            //String possible = (String) it2.next();
            //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
            UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
            UnicodeString attempt;
            nfd.normalize(possible, attempt, status);

            // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
            if (attempt==segment) {
                //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
                // TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
                result.put(possible, new UnicodeString(possible), status); //add(possible);
            } else {
                //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
            }

            ne2 = permutations.nextElement(el2);
        }
        ne = basic.nextElement(el);
    }

    /* Test for buffer overflows */
    if(U_FAILURE(status)) {
        return 0;
    }
    // convert into a String[] to clean up storage
    //String[] finalResult = new String[result.size()];
    UnicodeString *finalResult = NULL;
    int32_t resultCount;
    if((resultCount = result.count()) != 0) {
        finalResult = new UnicodeString[resultCount];
        if (finalResult == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
    }
    else {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    //result.toArray(finalResult);
    result_len = 0;
    el = UHASH_FIRST;
    ne = result.nextElement(el);
    while(ne != NULL) {
        finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
        ne = result.nextElement(el);
    }


    return finalResult;
}

Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {

    if (U_FAILURE(status)) {
        return NULL;
    }

    //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));

    UnicodeString toPut(segment, segLen);

    fillinResult->put(toPut, new UnicodeString(toPut), status);

    UnicodeSet starts;

    // cycle through all the characters
    UChar32 cp;
    for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
        // see if any character is at the start of some decomposition
        U16_GET(segment, 0, i, segLen, cp);
        if (!nfcImpl.getCanonStartSet(cp, starts)) {
            continue;
        }
        // if so, see which decompositions match
        UnicodeSetIterator iter(starts);
        while (iter.next()) {
            UChar32 cp2 = iter.getCodepoint();
            Hashtable remainder(status);
            remainder.setValueDeleter(uprv_deleteUObject);
            if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
                continue;
            }

            // there were some matches, so add all the possibilities to the set.
            UnicodeString prefix(segment, i);
            prefix += cp2;

            int32_t el = UHASH_FIRST;
            const UHashElement *ne = remainder.nextElement(el);
            while (ne != NULL) {
                UnicodeString item = *((UnicodeString *)(ne->value.pointer));
                UnicodeString *toAdd = new UnicodeString(prefix);
                /* test for NULL */
                if (toAdd == 0) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    return NULL;
                }
                *toAdd += item;
                fillinResult->put(*toAdd, toAdd, status);

                //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));

                ne = remainder.nextElement(el);
            }
        }
    }

    /* Test for buffer overflows */
    if(U_FAILURE(status)) {
        return NULL;
    }
    return fillinResult;
}

/**
 * See if the decomposition of cp2 is at segment starting at segmentPos 
 * (with canonical rearrangment!)
 * If so, take the remainder, and return the equivalents 
 */
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
    //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
    //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);

    if (U_FAILURE(status)) {
        return NULL;
    }

    UnicodeString temp(comp);
    int32_t inputLen=temp.length();
    UnicodeString decompString;
    nfd.normalize(temp, decompString, status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    if (decompString.isBogus()) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
    const UChar *decomp=decompString.getBuffer();
    int32_t decompLen=decompString.length();

    // See if it matches the start of segment (at segmentPos)
    UBool ok = FALSE;
    UChar32 cp;
    int32_t decompPos = 0;
    UChar32 decompCp;
    U16_NEXT(decomp, decompPos, decompLen, decompCp);

    int32_t i = segmentPos;
    while(i < segLen) {
        U16_NEXT(segment, i, segLen, cp);

        if (cp == decompCp) { // if equal, eat another cp from decomp

            //if (PROGRESS) printf("  matches: %s\n", UToS(Tr(UnicodeString(cp))));

            if (decompPos == decompLen) { // done, have all decomp characters!
                temp.append(segment+i, segLen-i);
                ok = TRUE;
                break;
            }
            U16_NEXT(decomp, decompPos, decompLen, decompCp);
        } else {
            //if (PROGRESS) printf("  buffer: %s\n", UToS(Tr(UnicodeString(cp))));

            // brute force approach
            temp.append(cp);

            /* TODO: optimize
            // since we know that the classes are monotonically increasing, after zero
            // e.g. 0 5 7 9 0 3
            // we can do an optimization
            // there are only a few cases that work: zero, less, same, greater
            // if both classes are the same, we fail
            // if the decomp class < the segment class, we fail

            segClass = getClass(cp);
            if (decompClass <= segClass) return null;
            */
        }
    }
    if (!ok)
        return NULL; // we failed, characters left over

    //if (PROGRESS) printf("Matches\n");

    if (inputLen == temp.length()) {
        fillinResult->put(UnicodeString(), new UnicodeString(), status);
        return fillinResult; // succeed, but no remainder
    }

    // brute force approach
    // check to make sure result is canonically equivalent
    UnicodeString trial;
    nfd.normalize(temp, trial, status);
    if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
        return NULL;
    }

    return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_NORMALIZATION */
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// characterproperties.cpp
// created: 2018sep03 Markus W. Scherer






// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// umutablecptrie.h (split out of ucptrie.h)
// created: 2018jan24 Markus W. Scherer









U_CDECL_BEGIN

/**
 * \file
 *
 * This file defines a mutable Unicode code point trie.
 *
 * @see UCPTrie
 * @see UMutableCPTrie
 */

/**
 * Mutable Unicode code point trie.
 * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values.
 * For details see http://site.icu-project.org/design/struct/utrie
 *
 * Setting values (especially ranges) and lookup is fast.
 * The mutable trie is only somewhat space-efficient.
 * It builds a compacted, immutable UCPTrie.
 *
 * This trie can be modified while iterating over its contents.
 * For example, it is possible to merge its values with those from another
 * set of ranges (e.g., another mutable or immutable trie):
 * Iterate over those source ranges; for each of them iterate over this trie;
 * add the source value into the value of each trie range.
 *
 * @see UCPTrie
 * @see umutablecptrie_buildImmutable
 * @stable ICU 63
 */
typedef struct UMutableCPTrie UMutableCPTrie;

/**
 * Creates a mutable trie that initially maps each Unicode code point to the same value.
 * It uses 32-bit data values until umutablecptrie_buildImmutable() is called.
 * umutablecptrie_buildImmutable() takes a valueWidth parameter which
 * determines the number of bits in the data value in the resulting UCPTrie.
 * You must umutablecptrie_close() the trie once you are done using it.
 *
 * @param initialValue the initial value that is set for all code points
 * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the trie
 * @stable ICU 63
 */
U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);

/**
 * Clones a mutable trie.
 * You must umutablecptrie_close() the clone once you are done using it.
 *
 * @param other the trie to clone
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the trie clone
 * @stable ICU 63
 */
U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode);

/**
 * Closes a mutable trie and releases associated memory.
 *
 * @param trie the trie
 * @stable ICU 63
 */
U_CAPI void U_EXPORT2
umutablecptrie_close(UMutableCPTrie *trie);

#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUMutableCPTriePointer
 * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 63
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close);

U_NAMESPACE_END

#endif

/**
 * Creates a mutable trie with the same contents as the UCPMap.
 * You must umutablecptrie_close() the mutable trie once you are done using it.
 *
 * @param map the source map
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the mutable trie
 * @stable ICU 63
 */
U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode);

/**
 * Creates a mutable trie with the same contents as the immutable one.
 * You must umutablecptrie_close() the mutable trie once you are done using it.
 *
 * @param trie the immutable trie
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the mutable trie
 * @stable ICU 63
 */
U_CAPI UMutableCPTrie * U_EXPORT2
umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode);

/**
 * Returns the value for a code point as stored in the trie.
 *
 * @param trie the trie
 * @param c the code point
 * @return the value
 * @stable ICU 63
 */
U_CAPI uint32_t U_EXPORT2
umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);

/**
 * Returns the last code point such that all those from start to there have the same value.
 * Can be used to efficiently iterate over all same-value ranges in a trie.
 * (This is normally faster than iterating over code points and get()ting each value,
 * but much slower than a data structure that stores ranges directly.)
 *
 * The trie can be modified between calls to this function.
 *
 * If the UCPMapValueFilter function pointer is not NULL, then
 * the value to be delivered is passed through that function, and the return value is the end
 * of the range where all values are modified to the same actual value.
 * The value is unchanged if that function pointer is NULL.
 *
 * See the same-signature ucptrie_getRange() for a code sample.
 *
 * @param trie the trie
 * @param start range start
 * @param option defines whether surrogates are treated normally,
 *               or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
 * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
 * @param filter a pointer to a function that may modify the trie data value,
 *     or NULL if the values from the trie are to be used unmodified
 * @param context an opaque pointer that is passed on to the filter function
 * @param pValue if not NULL, receives the value that every code point start..end has;
 *     may have been modified by filter(context, trie value)
 *     if that function pointer is not NULL
 * @return the range end code point, or -1 if start is not a valid code point
 * @stable ICU 63
 */
U_CAPI UChar32 U_EXPORT2
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
                        UCPMapRangeOption option, uint32_t surrogateValue,
                        UCPMapValueFilter *filter, const void *context, uint32_t *pValue);

/**
 * Sets a value for a code point.
 *
 * @param trie the trie
 * @param c the code point
 * @param value the value
 * @param pErrorCode an in/out ICU UErrorCode
 * @stable ICU 63
 */
U_CAPI void U_EXPORT2
umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);

/**
 * Sets a value for each code point [start..end].
 * Faster and more space-efficient than setting the value for each code point separately.
 *
 * @param trie the trie
 * @param start the first code point to get the value
 * @param end the last code point to get the value (inclusive)
 * @param value the value
 * @param pErrorCode an in/out ICU UErrorCode
 * @stable ICU 63
 */
U_CAPI void U_EXPORT2
umutablecptrie_setRange(UMutableCPTrie *trie,
                        UChar32 start, UChar32 end,
                        uint32_t value, UErrorCode *pErrorCode);

/**
 * Compacts the data and builds an immutable UCPTrie according to the parameters.
 * After this, the mutable trie will be empty.
 *
 * The mutable trie stores 32-bit values until buildImmutable() is called.
 * If values shorter than 32 bits are to be stored in the immutable trie,
 * then the upper bits are discarded.
 * For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581,
 * and the value width is 8 bits, then each of these is stored as 0x81
 * and the immutable trie will return that as an unsigned value.
 * (Some implementations may want to make productive temporary use of the upper bits
 * until buildImmutable() discards them.)
 *
 * Not every possible set of mappings can be built into a UCPTrie,
 * because of limitations resulting from speed and space optimizations.
 * Every Unicode assigned character can be mapped to a unique value.
 * Typical data yields data structures far smaller than the limitations.
 *
 * It is possible to construct extremely unusual mappings that exceed the data structure limits.
 * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR.
 *
 * @param trie the trie trie
 * @param type selects the trie type
 * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits,
 *                   then the values stored in the trie will be truncated first
 * @param pErrorCode an in/out ICU UErrorCode
 *
 * @see umutablecptrie_fromUCPTrie
 * @stable ICU 63
 */
U_CAPI UCPTrie * U_EXPORT2
umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
                              UErrorCode *pErrorCode);

U_CDECL_END









// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2004-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  ubidi_props.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2004dec30
*   created by: Markus W. Scherer
*
*   Low-level Unicode bidi/shaping properties access.
*/








U_CDECL_BEGIN

/* library API -------------------------------------------------------------- */

U_CFUNC void
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

/* property access functions */

U_CFUNC int32_t
ubidi_getMaxValue(UProperty which);

U_CAPI UCharDirection
ubidi_getClass(UChar32 c);

U_CFUNC UBool
ubidi_isMirrored(UChar32 c);

U_CFUNC UChar32
ubidi_getMirror(UChar32 c);

U_CFUNC UBool
ubidi_isBidiControl(UChar32 c);

U_CFUNC UBool
ubidi_isJoinControl(UChar32 c);

U_CFUNC UJoiningType
ubidi_getJoiningType(UChar32 c);

U_CFUNC UJoiningGroup
ubidi_getJoiningGroup(UChar32 c);

U_CFUNC UBidiPairedBracketType
ubidi_getPairedBracketType(UChar32 c);

U_CFUNC UChar32
ubidi_getPairedBracket(UChar32 c);

/* file definitions --------------------------------------------------------- */

#define UBIDI_DATA_NAME "ubidi"
#define UBIDI_DATA_TYPE "icu"

/* format "BiDi" */
#define UBIDI_FMT_0 0x42
#define UBIDI_FMT_1 0x69
#define UBIDI_FMT_2 0x44
#define UBIDI_FMT_3 0x69

/* indexes into indexes[] */
enum {
    UBIDI_IX_INDEX_TOP,
    UBIDI_IX_LENGTH,
    UBIDI_IX_TRIE_SIZE,
    UBIDI_IX_MIRROR_LENGTH,

    UBIDI_IX_JG_START,
    UBIDI_IX_JG_LIMIT,
    UBIDI_IX_JG_START2,  /* new in format version 2.2, ICU 54 */
    UBIDI_IX_JG_LIMIT2,

    UBIDI_MAX_VALUES_INDEX=15,
    UBIDI_IX_TOP=16
};

/* definitions for 16-bit bidi/shaping properties word ---------------------- */

enum {
 /* UBIDI_CLASS_SHIFT=0, */     /* bidi class: 5 bits (4..0) */
    UBIDI_JT_SHIFT=5,           /* joining type: 3 bits (7..5) */

    UBIDI_BPT_SHIFT=8,          /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */

    UBIDI_JOIN_CONTROL_SHIFT=10,
    UBIDI_BIDI_CONTROL_SHIFT=11,

    UBIDI_IS_MIRRORED_SHIFT=12,         /* 'is mirrored' */
    UBIDI_MIRROR_DELTA_SHIFT=13,        /* bidi mirroring delta: 3 bits (15..13) */

    UBIDI_MAX_JG_SHIFT=16               /* max JG value in indexes[UBIDI_MAX_VALUES_INDEX] bits 23..16 */
};

#define UBIDI_CLASS_MASK        0x0000001f
#define UBIDI_JT_MASK           0x000000e0
#define UBIDI_BPT_MASK          0x00000300

#define UBIDI_MAX_JG_MASK       0x00ff0000

#define UBIDI_GET_CLASS(props) ((props)&UBIDI_CLASS_MASK)
#define UBIDI_GET_FLAG(props, shift) (((props)>>(shift))&1)

#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
#   define UBIDI_GET_MIRROR_DELTA(props) ((int16_t)(props)>>UBIDI_MIRROR_DELTA_SHIFT)
#else
#   define UBIDI_GET_MIRROR_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UBIDI_MIRROR_DELTA_SHIFT)|0xe000) : ((props)>>UBIDI_MIRROR_DELTA_SHIFT))
#endif

enum {
    UBIDI_ESC_MIRROR_DELTA=-4,
    UBIDI_MIN_MIRROR_DELTA=-3,
    UBIDI_MAX_MIRROR_DELTA=3
};

/* definitions for 32-bit mirror table entry -------------------------------- */

enum {
    /* the source Unicode code point takes 21 bits (20..0) */
    UBIDI_MIRROR_INDEX_SHIFT=21,
    UBIDI_MAX_MIRROR_INDEX=0x7ff
};

#define UBIDI_GET_MIRROR_CODE_POINT(m) (UChar32)((m)&0x1fffff)

#define UBIDI_GET_MIRROR_INDEX(m) ((m)>>UBIDI_MIRROR_INDEX_SHIFT)

U_CDECL_END


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2004-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  ucase.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2004aug30
*   created by: Markus W. Scherer
*
*   Low-level Unicode character/string case mapping code.
*/







// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2001-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  utrie2.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2008aug16 (starting from a copy of utrie.h)
*   created by: Markus W. Scherer
*/






U_CDECL_BEGIN

struct UTrie;  /* forward declaration */
#ifndef __UTRIE_H__
typedef struct UTrie UTrie;
#endif

/**
 * \file
 *
 * This is a common implementation of a Unicode trie.
 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
 * Unicode code points (0..0x10ffff). (A map from code points to integers.)
 *
 * This is the second common version of a Unicode trie (hence the name UTrie2).
 * Compared with UTrie version 1:
 * - Still splitting BMP code points 11:5 bits for index and data table lookups.
 * - Still separate data for lead surrogate code _units_ vs. code _points_,
 *   but the lead surrogate code unit values are not required any more
 *   for data lookup for supplementary code points.
 * - The "folding" mechanism is removed. In UTrie version 1, this somewhat
 *   hard-to-explain mechanism was meant to be used for optimized UTF-16
 *   processing, with application-specific encoding of indexing bits
 *   in the lead surrogate data for the associated supplementary code points.
 * - For the last single-value code point range (ending with U+10ffff),
 *   the starting code point ("highStart") and the value are stored.
 * - For supplementary code points U+10000..highStart-1 a three-table lookup
 *   (two index tables and one data table) is used. The first index
 *   is truncated, omitting both the BMP portion and the high range.
 * - There is a special small index for 2-byte UTF-8, and the initial data
 *   entries are designed for fast 1/2-byte UTF-8 lookup.
 *   Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all,
 *   and the associated 2-byte indexes are unused.
 */

/**
 * Trie structure.
 * Use only with public API macros and functions.
 */
struct UTrie2;
typedef struct UTrie2 UTrie2;

/* Public UTrie2 API functions: read-only access ---------------------------- */

/**
 * Selectors for the width of a UTrie2 data value.
 */
enum UTrie2ValueBits {
    /** 16 bits per UTrie2 data value. */
    UTRIE2_16_VALUE_BITS,
    /** 32 bits per UTrie2 data value. */
    UTRIE2_32_VALUE_BITS,
    /** Number of selectors for the width of UTrie2 data values. */
    UTRIE2_COUNT_VALUE_BITS
};
typedef enum UTrie2ValueBits UTrie2ValueBits;

/**
 * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory.
 * Inverse of utrie2_serialize().
 * The memory must remain valid and unchanged as long as the trie is used.
 * You must utrie2_close() the trie once you are done using it.
 *
 * @param valueBits selects the data entry size; results in an
 *                  U_INVALID_FORMAT_ERROR if it does not match the serialized form
 * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2
 * @param length the number of bytes available at data;
 *               can be more than necessary
 * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
 *                      can be NULL
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the unserialized trie
 *
 * @see utrie2_open
 * @see utrie2_serialize
 */
U_CAPI UTrie2 * U_EXPORT2
utrie2_openFromSerialized(UTrie2ValueBits valueBits,
                          const void *data, int32_t length, int32_t *pActualLength,
                          UErrorCode *pErrorCode);

/**
 * Open a frozen, empty "dummy" trie.
 * A dummy trie is an empty trie, used when a real data trie cannot
 * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(),
 * but without internally creating and compacting/serializing the
 * builder data structure.
 *
 * The trie always returns the initialValue,
 * or the errorValue for out-of-range code points and illegal UTF-8.
 *
 * You must utrie2_close() the trie once you are done using it.
 *
 * @param valueBits selects the data entry size
 * @param initialValue the initial value that is set for all code points
 * @param errorValue the value for out-of-range code points and illegal UTF-8
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the dummy trie
 *
 * @see utrie2_openFromSerialized
 * @see utrie2_open
 */
U_CAPI UTrie2 * U_EXPORT2
utrie2_openDummy(UTrie2ValueBits valueBits,
                 uint32_t initialValue, uint32_t errorValue,
                 UErrorCode *pErrorCode);

/**
 * Get a value from a code point as stored in the trie.
 * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower.
 * Easier to use because, unlike the macros, this function works on all UTrie2
 * objects, frozen or not, holding 16-bit or 32-bit data values.
 *
 * @param trie the trie
 * @param c the code point
 * @return the value
 */
U_CAPI uint32_t U_EXPORT2
utrie2_get32(const UTrie2 *trie, UChar32 c);

/* enumeration callback types */

/**
 * Callback from utrie2_enum(), extracts a uint32_t value from a
 * trie value. This value will be passed on to the UTrie2EnumRange function.
 *
 * @param context an opaque pointer, as passed into utrie2_enum()
 * @param value a value from the trie
 * @return the value that is to be passed on to the UTrie2EnumRange function
 */
typedef uint32_t U_CALLCONV
UTrie2EnumValue(const void *context, uint32_t value);

/**
 * Callback from utrie2_enum(), is called for each contiguous range
 * of code points with the same value as retrieved from the trie and
 * transformed by the UTrie2EnumValue function.
 *
 * The callback function can stop the enumeration by returning FALSE.
 *
 * @param context an opaque pointer, as passed into utrie2_enum()
 * @param start the first code point in a contiguous range with value
 * @param end the last code point in a contiguous range with value (inclusive)
 * @param value the value that is set for all code points in [start..end]
 * @return FALSE to stop the enumeration
 */
typedef UBool U_CALLCONV
UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value);

/**
 * Enumerate efficiently all values in a trie.
 * Do not modify the trie during the enumeration.
 *
 * For each entry in the trie, the value to be delivered is passed through
 * the UTrie2EnumValue function.
 * The value is unchanged if that function pointer is NULL.
 *
 * For each contiguous range of code points with a given (transformed) value,
 * the UTrie2EnumRange function is called.
 *
 * @param trie a pointer to the trie
 * @param enumValue a pointer to a function that may transform the trie entry value,
 *                  or NULL if the values from the trie are to be used directly
 * @param enumRange a pointer to a function that is called for each contiguous range
 *                  of code points with the same (transformed) value
 * @param context an opaque pointer that is passed on to the callback functions
 */
U_CAPI void U_EXPORT2
utrie2_enum(const UTrie2 *trie,
            UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context);

/* Building a trie ---------------------------------------------------------- */

/**
 * Open an empty, writable trie. At build time, 32-bit data values are used.
 * utrie2_freeze() takes a valueBits parameter
 * which determines the data value width in the serialized and frozen forms.
 * You must utrie2_close() the trie once you are done using it.
 *
 * @param initialValue the initial value that is set for all code points
 * @param errorValue the value for out-of-range code points and illegal UTF-8
 * @param pErrorCode an in/out ICU UErrorCode
 * @return a pointer to the allocated and initialized new trie
 */
U_CAPI UTrie2 * U_EXPORT2
utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);

/**
 * Clone a trie.
 * You must utrie2_close() the clone once you are done using it.
 *
 * @param other the trie to clone
 * @param pErrorCode an in/out ICU UErrorCode
 * @return a pointer to the new trie clone
 */
U_CAPI UTrie2 * U_EXPORT2
utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode);

/**
 * Clone a trie. The clone will be mutable/writable even if the other trie
 * is frozen. (See utrie2_freeze().)
 * You must utrie2_close() the clone once you are done using it.
 *
 * @param other the trie to clone
 * @param pErrorCode an in/out ICU UErrorCode
 * @return a pointer to the new trie clone
 */
U_CAPI UTrie2 * U_EXPORT2
utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode);

/**
 * Close a trie and release associated memory.
 *
 * @param trie the trie
 */
U_CAPI void U_EXPORT2
utrie2_close(UTrie2 *trie);

/**
 * Set a value for a code point.
 *
 * @param trie the unfrozen trie
 * @param c the code point
 * @param value the value
 * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
 * - U_NO_WRITE_PERMISSION if the trie is frozen
 */
U_CAPI void U_EXPORT2
utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);

/**
 * Set a value in a range of code points [start..end].
 * All code points c with start<=c<=end will get the value if
 * overwrite is TRUE or if the old value is the initial value.
 *
 * @param trie the unfrozen trie
 * @param start the first code point to get the value
 * @param end the last code point to get the value (inclusive)
 * @param value the value
 * @param overwrite flag for whether old non-initial values are to be overwritten
 * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
 * - U_NO_WRITE_PERMISSION if the trie is frozen
 */
U_CAPI void U_EXPORT2
utrie2_setRange32(UTrie2 *trie,
                  UChar32 start, UChar32 end,
                  uint32_t value, UBool overwrite,
                  UErrorCode *pErrorCode);

/**
 * Freeze a trie. Make it immutable (read-only) and compact it,
 * ready for serialization and for use with fast macros.
 * Functions to set values will fail after serializing.
 *
 * A trie can be frozen only once. If this function is called again with different
 * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR.
 *
 * @param trie the trie
 * @param valueBits selects the data entry size; if smaller than 32 bits, then
 *                  the values stored in the trie will be truncated
 * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
 * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long
 *                             for serialization
 *                             (the trie will be immutable and usable,
 *                             but not frozen and not usable with the fast macros)
 *
 * @see utrie2_cloneAsThawed
 */
U_CAPI void U_EXPORT2
utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode);

/**
 * Test if the trie is frozen. (See utrie2_freeze().)
 *
 * @param trie the trie
 * @return TRUE if the trie is frozen, that is, immutable, ready for serialization
 *         and for use with fast macros
 */
U_CAPI UBool U_EXPORT2
utrie2_isFrozen(const UTrie2 *trie);

/**
 * Serialize a frozen trie into 32-bit aligned memory.
 * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR.
 * A trie can be serialized multiple times.
 *
 * @param trie the frozen trie
 * @param data a pointer to 32-bit-aligned memory to be filled with the trie data,
 *             can be NULL if capacity==0
 * @param capacity the number of bytes available at data,
 *                 or 0 for preflighting
 * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
 * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
 * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity
 *                            parameters are bad
 * @return the number of bytes written or needed for the trie
 *
 * @see utrie2_openFromSerialized()
 */
U_CAPI int32_t U_EXPORT2
utrie2_serialize(const UTrie2 *trie,
                 void *data, int32_t capacity,
                 UErrorCode *pErrorCode);

/* Public UTrie2 API: miscellaneous functions ------------------------------- */

/**
 * Build a UTrie2 (version 2) from a UTrie (version 1).
 * Enumerates all values in the UTrie and builds a UTrie2 with the same values.
 * The resulting UTrie2 will be frozen.
 *
 * @param trie1 the runtime UTrie structure to be enumerated
 * @param errorValue the value for out-of-range code points and illegal UTF-8
 * @param pErrorCode an in/out ICU UErrorCode
 * @return The frozen UTrie2 with the same values as the UTrie.
 */
U_CAPI UTrie2 * U_EXPORT2
utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode);

/* Public UTrie2 API macros ------------------------------------------------- */

/*
 * These macros provide fast data lookup from a frozen trie.
 * They will crash when used on an unfrozen trie.
 */

/**
 * Return a 16-bit trie value from a code point, with range checking.
 * Returns trie->errorValue if c is not in the range 0..U+10ffff.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param c (UChar32, in) the input code point
 * @return (uint16_t) The code point's trie value.
 */
#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c))

/**
 * Return a 32-bit trie value from a code point, with range checking.
 * Returns trie->errorValue if c is not in the range 0..U+10ffff.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param c (UChar32, in) the input code point
 * @return (uint32_t) The code point's trie value.
 */
#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c))

/**
 * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
 * and get a 16-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param src (const UChar *, in/out) the source text pointer
 * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
 * @param c (UChar32, out) variable for the code point
 * @param result (uint16_t, out) uint16_t variable for the trie lookup result
 */
#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result)

/**
 * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
 * and get a 32-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param src (const UChar *, in/out) the source text pointer
 * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
 * @param c (UChar32, out) variable for the code point
 * @param result (uint32_t, out) uint32_t variable for the trie lookup result
 */
#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result)

/**
 * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
 * and get a 16-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param start (const UChar *, in) the start pointer for the text
 * @param src (const UChar *, in/out) the source text pointer
 * @param c (UChar32, out) variable for the code point
 * @param result (uint16_t, out) uint16_t variable for the trie lookup result
 */
#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result)

/**
 * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
 * and get a 32-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param start (const UChar *, in) the start pointer for the text
 * @param src (const UChar *, in/out) the source text pointer
 * @param c (UChar32, out) variable for the code point
 * @param result (uint32_t, out) uint32_t variable for the trie lookup result
 */
#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result)

/**
 * UTF-8: Post-increment src and get a 16-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param src (const char *, in/out) the source text pointer
 * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
 * @param result (uint16_t, out) uint16_t variable for the trie lookup result
 */
#define UTRIE2_U8_NEXT16(trie, src, limit, result)\
    _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result)

/**
 * UTF-8: Post-increment src and get a 32-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param src (const char *, in/out) the source text pointer
 * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
 * @param result (uint16_t, out) uint32_t variable for the trie lookup result
 */
#define UTRIE2_U8_NEXT32(trie, src, limit, result) \
    _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result)

/**
 * UTF-8: Pre-decrement src and get a 16-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param start (const char *, in) the start pointer for the text
 * @param src (const char *, in/out) the source text pointer
 * @param result (uint16_t, out) uint16_t variable for the trie lookup result
 */
#define UTRIE2_U8_PREV16(trie, start, src, result) \
    _UTRIE2_U8_PREV(trie, data16, index, start, src, result)

/**
 * UTF-8: Pre-decrement src and get a 32-bit value from the trie.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param start (const char *, in) the start pointer for the text
 * @param src (const char *, in/out) the source text pointer
 * @param result (uint16_t, out) uint32_t variable for the trie lookup result
 */
#define UTRIE2_U8_PREV32(trie, start, src, result) \
    _UTRIE2_U8_PREV(trie, data32, data32, start, src, result)

/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */

/*
 * The following functions and macros are used for highly optimized UTF-16
 * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these.
 *
 * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_.
 * UTF-16 text processing can be optimized by detecting surrogate pairs and
 * assembling supplementary code points only when there is non-trivial data
 * available.
 *
 * At build-time, use utrie2_enumForLeadSurrogate() to see if there
 * is non-trivial (non-initialValue) data for any of the supplementary
 * code points associated with a lead surrogate.
 * If so, then set a special (application-specific) value for the
 * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit().
 *
 * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or
 * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial
 * data and the code unit is a lead surrogate, then check if a trail surrogate
 * follows. If so, assemble the supplementary code point with
 * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP()
 * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead
 * surrogate's value or do a code point lookup for it.
 *
 * If there is only trivial data for lead and trail surrogates, then processing
 * can often skip them. For example, in normalization or case mapping
 * all characters that do not have any mappings are simply copied as is.
 */

/**
 * Get a value from a lead surrogate code unit as stored in the trie.
 *
 * @param trie the trie
 * @param c the code unit (U+D800..U+DBFF)
 * @return the value
 */
U_CAPI uint32_t U_EXPORT2
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c);

/**
 * Enumerate the trie values for the 1024=0x400 code points
 * corresponding to a given lead surrogate.
 * For example, for the lead surrogate U+D87E it will enumerate the values
 * for [U+2F800..U+2FC00[.
 * Used by data builder code that sets special lead surrogate code unit values
 * for optimized UTF-16 string processing.
 *
 * Do not modify the trie during the enumeration.
 *
 * Except for the limited code point range, this functions just like utrie2_enum():
 * For each entry in the trie, the value to be delivered is passed through
 * the UTrie2EnumValue function.
 * The value is unchanged if that function pointer is NULL.
 *
 * For each contiguous range of code points with a given (transformed) value,
 * the UTrie2EnumRange function is called.
 *
 * @param trie a pointer to the trie
 * @param enumValue a pointer to a function that may transform the trie entry value,
 *                  or NULL if the values from the trie are to be used directly
 * @param enumRange a pointer to a function that is called for each contiguous range
 *                  of code points with the same (transformed) value
 * @param context an opaque pointer that is passed on to the callback functions
 */
U_CAPI void U_EXPORT2
utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
                            UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
                            const void *context);

/**
 * Set a value for a lead surrogate code unit.
 *
 * @param trie the unfrozen trie
 * @param lead the lead surrogate code unit (U+D800..U+DBFF)
 * @param value the value
 * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
 * - U_NO_WRITE_PERMISSION if the trie is frozen
 */
U_CAPI void U_EXPORT2
utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
                                     UChar32 lead, uint32_t value,
                                     UErrorCode *pErrorCode);

/**
 * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
 * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates,
 * but smaller and faster.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
 * @return (uint16_t) The code unit's trie value.
 */
#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c)

/**
 * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
 * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates,
 * but smaller and faster.
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
 * @return (uint32_t) The code unit's trie value.
 */
#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c)

/**
 * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff).
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
 * @return (uint16_t) The code point's trie value.
 */
#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c)

/**
 * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff).
 *
 * @param trie (const UTrie2 *, in) a frozen trie
 * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
 * @return (uint32_t) The code point's trie value.
 */
#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c)

U_CDECL_END

/* C++ convenience wrappers ------------------------------------------------- */

#ifdef __cplusplus




U_NAMESPACE_BEGIN

// Use the Forward/Backward subclasses below.
class UTrie2StringIterator : public UMemory {
public:
    UTrie2StringIterator(const UTrie2 *t, const UChar *p) :
        trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {}

    const UTrie2 *trie;
    const UChar *codePointStart, *codePointLimit;
    UChar32 codePoint;
};

class BackwardUTrie2StringIterator : public UTrie2StringIterator {
public:
    BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) :
        UTrie2StringIterator(t, p), start(s) {}

    uint16_t previous16();

    const UChar *start;
};

class ForwardUTrie2StringIterator : public UTrie2StringIterator {
public:
    // Iteration limit l can be NULL.
    // In that case, the caller must detect c==0 and stop.
    ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) :
        UTrie2StringIterator(t, p), limit(l) {}

    uint16_t next16();

    const UChar *limit;
};

U_NAMESPACE_END

#endif

/* Internal definitions ----------------------------------------------------- */

U_CDECL_BEGIN

/** Build-time trie structure. */
struct UNewTrie2;
typedef struct UNewTrie2 UNewTrie2;

/*
 * Trie structure definition.
 *
 * Either the data table is 16 bits wide and accessed via the index
 * pointer, with each index item increased by indexLength;
 * in this case, data32==NULL, and data16 is used for direct ASCII access.
 *
 * Or the data table is 32 bits wide and accessed via the data32 pointer.
 */
struct UTrie2 {
    /* protected: used by macros and functions for reading values */
    const uint16_t *index;
    const uint16_t *data16;     /* for fast UTF-8 ASCII access, if 16b data */
    const uint32_t *data32;     /* NULL if 16b data is used via index */

    int32_t indexLength, dataLength;
    uint16_t index2NullOffset;  /* 0xffff if there is no dedicated index-2 null block */
    uint16_t dataNullOffset;
    uint32_t initialValue;
    /** Value returned for out-of-range code points and illegal UTF-8. */
    uint32_t errorValue;

    /* Start of the last range which ends at U+10ffff, and its value. */
    UChar32 highStart;
    int32_t highValueIndex;

    /* private: used by builder and unserialization functions */
    void *memory;           /* serialized bytes; NULL if not frozen yet */
    int32_t length;         /* number of serialized bytes at memory; 0 if not frozen yet */
    UBool isMemoryOwned;    /* TRUE if the trie owns the memory */
    UBool padding1;
    int16_t padding2;
    UNewTrie2 *newTrie;     /* builder object; NULL when frozen */

#ifdef UTRIE2_DEBUG
    const char *name;
#endif
};

/**
 * Trie constants, defining shift widths, index array lengths, etc.
 *
 * These are needed for the runtime macros but users can treat these as
 * implementation details and skip to the actual public API further below.
 */
enum {
    /** Shift size for getting the index-1 table offset. */
    UTRIE2_SHIFT_1=6+5,

    /** Shift size for getting the index-2 table offset. */
    UTRIE2_SHIFT_2=5,

    /**
     * Difference between the two shift sizes,
     * for getting an index-1 offset from an index-2 offset. 6=11-5
     */
    UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2,

    /**
     * Number of index-1 entries for the BMP. 32=0x20
     * This part of the index-1 table is omitted from the serialized form.
     */
    UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1,

    /** Number of code points per index-1 table entry. 2048=0x800 */
    UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1,

    /** Number of entries in an index-2 block. 64=0x40 */
    UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2,

    /** Mask for getting the lower bits for the in-index-2-block offset. */
    UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1,

    /** Number of entries in a data block. 32=0x20 */
    UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2,

    /** Mask for getting the lower bits for the in-data-block offset. */
    UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1,

    /**
     * Shift size for shifting left the index array values.
     * Increases possible data size with 16-bit index values at the cost
     * of compactability.
     * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
     */
    UTRIE2_INDEX_SHIFT=2,

    /** The alignment size of a data block. Also the granularity for compaction. */
    UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT,

    /* Fixed layout of the first part of the index array. ------------------- */

    /**
     * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
     * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
     */
    UTRIE2_INDEX_2_OFFSET=0,

    /**
     * The part of the index-2 table for U+D800..U+DBFF stores values for
     * lead surrogate code _units_ not code _points_.
     * Values for lead surrogate code _points_ are indexed with this portion of the table.
     * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
     */
    UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2,
    UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2,

    /** Count the lengths of both BMP pieces. 2080=0x820 */
    UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH,

    /**
     * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
     * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
     */
    UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
    UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6,  /* U+0800 is the first code point after 2-byte UTF-8 */

    /**
     * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
     * Variable length, for code points up to highStart, where the last single-value range starts.
     * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
     * (For 0x100000 supplementary code points U+10000..U+10ffff.)
     *
     * The part of the index-2 table for supplementary code points starts
     * after this index-1 table.
     *
     * Both the index-1 table and the following part of the index-2 table
     * are omitted completely if there is only BMP data.
     */
    UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH,
    UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1,

    /*
     * Fixed layout of the first part of the data array. -----------------------
     * Starts with 4 blocks (128=0x80 entries) for ASCII.
     */

    /**
     * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
     * Used with linear access for single bytes 0..0xbf for simple error handling.
     * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
     */
    UTRIE2_BAD_UTF8_DATA_OFFSET=0x80,

    /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
    UTRIE2_DATA_START_OFFSET=0xc0
};

/* Internal functions and macros -------------------------------------------- */

/**
 * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations.
 * Do not call directly.
 * @internal
 */
U_INTERNAL int32_t U_EXPORT2
utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
                           const uint8_t *src, const uint8_t *limit);

/**
 * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations.
 * Do not call directly.
 * @internal
 */
U_INTERNAL int32_t U_EXPORT2
utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
                           const uint8_t *start, const uint8_t *src);


/** Internal low-level trie getter. Returns a data index. */
#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \
    (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \
    <<UTRIE2_INDEX_SHIFT)+ \
    ((c)&UTRIE2_DATA_MASK))

/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data index. */
#define _UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(trieIndex, c) _UTRIE2_INDEX_RAW(0, trieIndex, c)

/** Internal trie getter from a lead surrogate code point (D800..DBFF). Returns the data index. */
#define _UTRIE2_INDEX_FROM_LSCP(trieIndex, c) \
    _UTRIE2_INDEX_RAW(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2), trieIndex, c)

/** Internal trie getter from a BMP code point. Returns the data index. */
#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \
    _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
                      trieIndex, c)

/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */
#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \
    (((int32_t)((trieIndex)[ \
        (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \
                      ((c)>>UTRIE2_SHIFT_1)]+ \
        (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \
    <<UTRIE2_INDEX_SHIFT)+ \
    ((c)&UTRIE2_DATA_MASK))

/**
 * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
 * Returns the data index.
 */
#define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \
    ((uint32_t)(c)<0xd800 ? \
        _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \
        (uint32_t)(c)<=0xffff ? \
            _UTRIE2_INDEX_RAW( \
                (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
                (trie)->index, c) : \
            (uint32_t)(c)>0x10ffff ? \
                (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \
                (c)>=(trie)->highStart ? \
                    (trie)->highValueIndex : \
                    _UTRIE2_INDEX_FROM_SUPP((trie)->index, c))

/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */
#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \
    (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)]

/** Internal trie getter from a supplementary code point. Returns the data. */
#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \
    (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \
                 _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)]

/**
 * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
 * Returns the data.
 */
#define _UTRIE2_GET(trie, data, asciiOffset, c) \
    (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)]

/** Internal next-post-increment: get the next code point (c) and its data. */
#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
    { \
        uint16_t __c2; \
        (c)=*(src)++; \
        if(!U16_IS_LEAD(c)) { \
            (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \
        } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \
            (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \
        } else { \
            ++(src); \
            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
            (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/** Internal pre-decrement-previous: get the previous code point (c) and its data */
#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
    { \
        uint16_t __c2; \
        (c)=*--(src); \
        if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \
            (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \
        } else { \
            --(src); \
            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
            (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/** Internal UTF-8 next-post-increment: get the next code point's data. */
#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
    uint8_t __lead=(uint8_t)*(src)++; \
    if(U8_IS_SINGLE(__lead)) { \
        (result)=(trie)->ascii[__lead]; \
    } else { \
        uint8_t __t1, __t2; \
        if( /* handle U+0800..U+FFFF inline */ \
            0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \
            U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \
            (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
        ) { \
            (src)+=2; \
            (result)=(trie)->data[ \
                ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
                                         ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
                <<UTRIE2_INDEX_SHIFT)+ \
                (__t2&UTRIE2_DATA_MASK)]; \
        } else if( /* handle U+0080..U+07FF inline */ \
            __lead<0xe0 && __lead>=0xc2 && (src)<(limit) && \
            (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
        ) { \
            ++(src); \
            (result)=(trie)->data[ \
                (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
                __t1]; \
        } else { \
            int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \
                                                                       (const uint8_t *)(limit)); \
            (src)+=__index&7; \
            (result)=(trie)->data[__index>>3]; \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */
#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
    uint8_t __b=(uint8_t)*--(src); \
    if(U8_IS_SINGLE(__b)) { \
        (result)=(trie)->ascii[__b]; \
    } else { \
        int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \
                                                                (const uint8_t *)(src)); \
        (src)-=__index&7; \
        (result)=(trie)->data[__index>>3]; \
    } \
} UPRV_BLOCK_MACRO_END

U_CDECL_END



#ifdef __cplusplus
U_NAMESPACE_BEGIN

class UnicodeString;

U_NAMESPACE_END
#endif

/* library API -------------------------------------------------------------- */

U_CFUNC void U_EXPORT2
ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

/**
 * Requires non-NULL locale ID but otherwise does the equivalent of
 * checking for language codes as if uloc_getLanguage() were called:
 * Accepts both 2- and 3-letter codes and accepts case variants.
 */
U_CFUNC int32_t
ucase_getCaseLocale(const char *locale);

/* Casing locale types for ucase_getCaseLocale */
enum {
    UCASE_LOC_UNKNOWN,
    UCASE_LOC_ROOT,
    UCASE_LOC_TURKISH,
    UCASE_LOC_LITHUANIAN,
    UCASE_LOC_GREEK,
    UCASE_LOC_DUTCH
};

/**
 * Bit mask for getting just the options from a string compare options word
 * that are relevant for case-insensitive string comparison.
 * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
 * @internal
 */
#define _STRCASECMP_OPTIONS_MASK 0xffff

/**
 * Bit mask for getting just the options from a string compare options word
 * that are relevant for case folding (of a single string or code point).
 *
 * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
 * It is conceivable that at some point we might use one more bit for using uppercase sharp s.
 * It is conceivable that at some point we might want the option to use only simple case foldings
 * when operating on strings.
 *
 * See stringoptions.h.
 * @internal
 */
#define _FOLD_CASE_OPTIONS_MASK 7

/* single-code point functions */

U_CAPI UChar32 U_EXPORT2
ucase_tolower(UChar32 c);

U_CAPI UChar32 U_EXPORT2
ucase_toupper(UChar32 c);

U_CAPI UChar32 U_EXPORT2
ucase_totitle(UChar32 c);

U_CAPI UChar32 U_EXPORT2
ucase_fold(UChar32 c, uint32_t options);

/**
 * Adds all simple case mappings and the full case folding for c to sa,
 * and also adds special case closure mappings.
 * c itself is not added.
 * For example, the mappings
 * - for s include long s
 * - for sharp s include ss
 * - for k include the Kelvin sign
 */
U_CFUNC void U_EXPORT2
ucase_addCaseClosure(UChar32 c, const USetAdder *sa);

/**
 * Maps the string to single code points and adds the associated case closure
 * mappings.
 * The string is mapped to code points if it is their full case folding string.
 * In other words, this performs a reverse full case folding and then
 * adds the case closure items of the resulting code points.
 * If the string is found and its closure applied, then
 * the string itself is added as well as part of its code points' closure.
 * It must be length>=0.
 *
 * @return TRUE if the string was found
 */
U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa);

#ifdef __cplusplus
U_NAMESPACE_BEGIN

/**
 * Iterator over characters with more than one code point in the full default Case_Folding.
 */
class U_COMMON_API FullCaseFoldingIterator {
public:
    /** Constructor. */
    FullCaseFoldingIterator();
    /**
     * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
     * Returns a negative cp value at the end of the iteration.
     */
    UChar32 next(UnicodeString &full);
private:
    FullCaseFoldingIterator(const FullCaseFoldingIterator &);  // no copy
    FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &);  // no assignment

    const UChar *unfold;
    int32_t unfoldRows;
    int32_t unfoldRowWidth;
    int32_t unfoldStringWidth;
    int32_t currentRow;
    int32_t rowCpIndex;
};

/**
 * Fast case mapping data for ASCII/Latin.
 * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
 * Deltas must not cross the ASCII boundary, or else they cannot be easily used
 * in simple UTF-8 code.
 */
namespace LatinCase {

/** Case mapping/folding data for code points up to U+017F. */
constexpr UChar LIMIT = 0x180;
/** U+017F case-folds and uppercases crossing the ASCII boundary. */
constexpr UChar LONG_S = 0x17f;
/** Exception: Complex mapping, or too-large delta. */
constexpr int8_t EXC = -0x80;

/** Deltas for lowercasing for most locales, and default case folding. */
extern const int8_t TO_LOWER_NORMAL[LIMIT];
/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
extern const int8_t TO_LOWER_TR_LT[LIMIT];

/** Deltas for uppercasing for most locales. */
extern const int8_t TO_UPPER_NORMAL[LIMIT];
/** Deltas for uppercasing for tr/az. */
extern const int8_t TO_UPPER_TR[LIMIT];

}  // namespace LatinCase

U_NAMESPACE_END
#endif

/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
U_CAPI int32_t U_EXPORT2
ucase_getType(UChar32 c);

/** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */
U_CAPI int32_t U_EXPORT2
ucase_getTypeOrIgnorable(UChar32 c);

U_CAPI UBool U_EXPORT2
ucase_isSoftDotted(UChar32 c);

U_CAPI UBool U_EXPORT2
ucase_isCaseSensitive(UChar32 c);

/* string case mapping functions */

U_CDECL_BEGIN

/**
 * Iterator function for string case mappings, which need to look at the
 * context (surrounding text) of a given character for conditional mappings.
 *
 * The iterator only needs to go backward or forward away from the
 * character in question. It does not use any indexes on this interface.
 * It does not support random access or an arbitrary change of
 * iteration direction.
 *
 * The code point being case-mapped itself is never returned by
 * this iterator.
 *
 * @param context A pointer to the iterator's working data.
 * @param dir If <0 then start iterating backward from the character;
 *            if >0 then start iterating forward from the character;
 *            if 0 then continue iterating in the current direction.
 * @return Next code point, or <0 when the iteration is done.
 */
typedef UChar32 U_CALLCONV
UCaseContextIterator(void *context, int8_t dir);

/**
 * Sample struct which may be used by some implementations of
 * UCaseContextIterator.
 */
struct UCaseContext {
    void *p;
    int32_t start, index, limit;
    int32_t cpStart, cpLimit;
    int8_t dir;
    int8_t b1, b2, b3;
};
typedef struct UCaseContext UCaseContext;

U_CDECL_END

#define UCASECONTEXT_INITIALIZER { NULL,  0, 0, 0,  0, 0,  0,  0, 0, 0 }

enum {
    /**
     * For string case mappings, a single character (a code point) is mapped
     * either to itself (in which case in-place mapping functions do nothing),
     * or to another single code point, or to a string.
     * Aside from the string contents, these are indicated with a single int32_t
     * value as follows:
     *
     * Mapping to self: Negative values (~self instead of -self to support U+0000)
     *
     * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH
     *
     * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is
     * returned. Note that the string result may indeed have zero length.
     */
    UCASE_MAX_STRING_LENGTH=0x1f
};

/**
 * Get the full lowercase mapping for c.
 *
 * @param csp Case mapping properties.
 * @param c Character to be mapped.
 * @param iter Character iterator, used for context-sensitive mappings.
 *             See UCaseContextIterator for details.
 *             If iter==NULL then a context-independent result is returned.
 * @param context Pointer to be passed into iter.
 * @param pString If the mapping result is a string, then the pointer is
 *                written to *pString.
 * @param caseLocale Case locale value from ucase_getCaseLocale().
 * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
 *
 * @see UCaseContextIterator
 * @see UCASE_MAX_STRING_LENGTH
 * @internal
 */
U_CAPI int32_t U_EXPORT2
ucase_toFullLower(UChar32 c,
                  UCaseContextIterator *iter, void *context,
                  const UChar **pString,
                  int32_t caseLocale);

U_CAPI int32_t U_EXPORT2
ucase_toFullUpper(UChar32 c,
                  UCaseContextIterator *iter, void *context,
                  const UChar **pString,
                  int32_t caseLocale);

U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(UChar32 c,
                  UCaseContextIterator *iter, void *context,
                  const UChar **pString,
                  int32_t caseLocale);

U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(UChar32 c,
                    const UChar **pString,
                    uint32_t options);

U_CFUNC int32_t U_EXPORT2
ucase_hasBinaryProperty(UChar32 c, UProperty which);


U_CDECL_BEGIN

/**
 * @internal
 */
typedef int32_t U_CALLCONV
UCaseMapFull(UChar32 c,
             UCaseContextIterator *iter, void *context,
             const UChar **pString,
             int32_t caseLocale);

U_CDECL_END

/* file definitions --------------------------------------------------------- */

#define UCASE_DATA_NAME "ucase"
#define UCASE_DATA_TYPE "icu"

/* format "cAsE" */
#define UCASE_FMT_0 0x63
#define UCASE_FMT_1 0x41
#define UCASE_FMT_2 0x53
#define UCASE_FMT_3 0x45

/* indexes into indexes[] */
enum {
    UCASE_IX_INDEX_TOP,
    UCASE_IX_LENGTH,
    UCASE_IX_TRIE_SIZE,
    UCASE_IX_EXC_LENGTH,
    UCASE_IX_UNFOLD_LENGTH,

    UCASE_IX_MAX_FULL_LENGTH=15,
    UCASE_IX_TOP=16
};

/* definitions for 16-bit case properties word ------------------------------ */

U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie();

/* 2-bit constants for types of cased characters */
#define UCASE_TYPE_MASK     3
enum {
    UCASE_NONE,
    UCASE_LOWER,
    UCASE_UPPER,
    UCASE_TITLE
};

#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)

#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)

#define UCASE_IGNORABLE         4
#define UCASE_EXCEPTION         8
#define UCASE_SENSITIVE         0x10

#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)

#define UCASE_DOT_MASK      0x60
enum {
    UCASE_NO_DOT=0,         /* normal characters with cc=0 */
    UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */
    UCASE_ABOVE=0x40,       /* "above" accents with cc=230 */
    UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */
};

/* no exception: bits 15..7 are a 9-bit signed case mapping delta */
#define UCASE_DELTA_SHIFT   7
#define UCASE_DELTA_MASK    0xff80
#define UCASE_MAX_DELTA     0xff
#define UCASE_MIN_DELTA     (-UCASE_MAX_DELTA-1)

#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
#   define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
#else
#   define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
#endif

/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
#define UCASE_EXC_SHIFT     4
#define UCASE_EXC_MASK      0xfff0
#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)

/* definitions for 16-bit main exceptions word ------------------------------ */

/* first 8 bits indicate values in optional slots */
enum {
    UCASE_EXC_LOWER,
    UCASE_EXC_FOLD,
    UCASE_EXC_UPPER,
    UCASE_EXC_TITLE,
    UCASE_EXC_DELTA,
    UCASE_EXC_5,            /* reserved */
    UCASE_EXC_CLOSURE,
    UCASE_EXC_FULL_MAPPINGS,
    UCASE_EXC_ALL_SLOTS     /* one past the last slot */
};

/* each slot is 2 uint16_t instead of 1 */
#define UCASE_EXC_DOUBLE_SLOTS      0x100

enum {
    UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200,
    UCASE_EXC_DELTA_IS_NEGATIVE=0x400,
    UCASE_EXC_SENSITIVE=0x800
};

/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
#define UCASE_EXC_DOT_SHIFT     7

/* normally stored in the main word, but pushed out for larger exception indexes */
#define UCASE_EXC_DOT_MASK      0x3000
enum {
    UCASE_EXC_NO_DOT=0,
    UCASE_EXC_SOFT_DOTTED=0x1000,
    UCASE_EXC_ABOVE=0x2000,         /* "above" accents with cc=230 */
    UCASE_EXC_OTHER_ACCENT=0x3000   /* other character (0<cc!=230) */
};

/* complex/conditional mappings */
#define UCASE_EXC_CONDITIONAL_SPECIAL   0x4000
#define UCASE_EXC_CONDITIONAL_FOLD      0x8000

/* definitions for lengths word for full case mappings */
#define UCASE_FULL_LOWER    0xf
#define UCASE_FULL_FOLDING  0xf0
#define UCASE_FULL_UPPER    0xf00
#define UCASE_FULL_TITLE    0xf000

/* maximum lengths */
#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf)
#define UCASE_CLOSURE_MAX_LENGTH 0xf

/* constants for reverse case folding ("unfold") data */
enum {
    UCASE_UNFOLD_ROWS,
    UCASE_UNFOLD_ROW_WIDTH,
    UCASE_UNFOLD_STRING_WIDTH
};


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 2001-2016, International Business Machines
*                Corporation and others. All Rights Reserved.
******************************************************************************
*   file name:  ucln_cmn.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001July05
*   created by: George Rhoten
*/



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 2001-2013, International Business Machines
*                Corporation and others. All Rights Reserved.
*
******************************************************************************
*   file name:  ucln.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001July05
*   created by: George Rhoten
*/




/** These are the functions used to register a library's memory cleanup
 * functions.  Each library should define a single library register function
 * to call this API.  In the i18n library, it is ucln_i18n_registerCleanup().
 *
 * None of the cleanup functions should use a mutex to clean up an API's
 * allocated memory because a cleanup function is not meant to be thread safe,
 * and plenty of data cannot be reference counted in order to make sure that
 * no one else needs the allocated data.
 *
 * In order to make a cleanup function get called when u_cleanup is called,
 * You should add your function to the library specific cleanup function.
 * If the cleanup function is not in the common library, the code that
 * allocates the memory should call the library specific cleanup function.
 * For instance, in the i18n library, any memory allocated statically must
 * call ucln_i18n_registerCleanup() from the ucln_in.h header.  These library
 * cleanup functions are needed in order to prevent a circular dependency
 * between the common library and any other library.
 *
 * The order of the cleanup is very important.  In general, an API that
 * depends on a second API should be cleaned up before the second API.
 * For instance, the default converter in ustring depends upon the converter
 * API.  So the default converter should be closed before the converter API
 * has its cache flushed.  This will prevent any memory leaks due to
 * reference counting.
 *
 * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples.
 */

/**
 * Data Type for cleanup function selector. These roughly correspond to libraries.
 */
typedef enum ECleanupLibraryType {
    UCLN_START = -1,
    UCLN_UPLUG,     /* ICU plugins */
    UCLN_CUSTOM,    /* Custom is for anyone else. */
    UCLN_CTESTFW,
    UCLN_TOOLUTIL,
    UCLN_LAYOUTEX,
    UCLN_LAYOUT,
    UCLN_IO,
    UCLN_I18N,
    UCLN_COMMON /* This must be the last one to cleanup. */
} ECleanupLibraryType;

/**
 * Data type for cleanup function pointer
 */
U_CDECL_BEGIN
typedef UBool U_CALLCONV cleanupFunc(void);
typedef void U_CALLCONV initFunc(UErrorCode *);
U_CDECL_END

/**
 * Register a cleanup function
 * @param type which library to register for.
 * @param func the function pointer
 */
U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
                                           cleanupFunc *func);

/**
 * Request cleanup for one specific library.
 * Not thread safe.
 * @param type which library to cleanup
 */
U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);



/* These are the cleanup functions for various APIs. */
/* @return true if cleanup complete successfully.*/
U_CFUNC UBool utrace_cleanup(void);

U_CFUNC UBool ucln_lib_cleanup(void);

/*
Please keep the order of enums declared in same order
as the cleanup functions are suppose to be called. */
typedef enum ECleanupCommonType {
    UCLN_COMMON_START = -1,
    UCLN_COMMON_NUMPARSE_UNISETS,
    UCLN_COMMON_USPREP,
    UCLN_COMMON_BREAKITERATOR,
    UCLN_COMMON_RBBI,
    UCLN_COMMON_SERVICE,
    UCLN_COMMON_LOCALE_KEY_TYPE,
    UCLN_COMMON_LOCALE,
    UCLN_COMMON_LOCALE_AVAILABLE,
    UCLN_COMMON_LIKELY_SUBTAGS,
    UCLN_COMMON_LOCALE_DISTANCE,
    UCLN_COMMON_ULOC,
    UCLN_COMMON_CURRENCY,
    UCLN_COMMON_LOADED_NORMALIZER2,
    UCLN_COMMON_NORMALIZER2,
    UCLN_COMMON_CHARACTERPROPERTIES,
    UCLN_COMMON_USET,
    UCLN_COMMON_UNAMES,
    UCLN_COMMON_UPROPS,
    UCLN_COMMON_UCNV,
    UCLN_COMMON_UCNV_IO,
    UCLN_COMMON_UDATA,
    UCLN_COMMON_PUTIL,
    UCLN_COMMON_UINIT,

    /*
       Unified caches caches collation stuff. Collation data structures
       contain resource bundles which means that unified cache cleanup
       must happen before resource bundle clean up.
    */
    UCLN_COMMON_UNIFIED_CACHE,
    UCLN_COMMON_URES,
    UCLN_COMMON_MUTEX,    // Mutexes should be the last to be cleaned up.
    UCLN_COMMON_COUNT /* This must be last */
} ECleanupCommonType;

/* Main library cleanup registration function. */
/* See common/ucln.h for details on adding a cleanup function. */
/* Note: the global mutex must not be held when calling this function. */
U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type,
                                                   cleanupFunc *func);



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2002-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  uprops.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2002feb24
*   created by: Markus W. Scherer
*
*   Constants for mostly non-core Unicode character properties
*   stored in uprops.icu.
*/







/* indexes[] entries */
enum {
    UPROPS_PROPS32_INDEX,
    UPROPS_EXCEPTIONS_INDEX,
    UPROPS_EXCEPTIONS_TOP_INDEX,

    UPROPS_ADDITIONAL_TRIE_INDEX,
    UPROPS_ADDITIONAL_VECTORS_INDEX,
    UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX,

    UPROPS_SCRIPT_EXTENSIONS_INDEX,

    UPROPS_RESERVED_INDEX_7,
    UPROPS_RESERVED_INDEX_8,

    /* size of the data file (number of 32-bit units after the header) */
    UPROPS_DATA_TOP_INDEX,

    /* maximum values for code values in vector word 0 */
    UPROPS_MAX_VALUES_INDEX=10,
    /* maximum values for code values in vector word 2 */
    UPROPS_MAX_VALUES_2_INDEX,

    UPROPS_INDEX_COUNT=16
};

/* definitions for the main properties words */
enum {
    /* general category shift==0                                0 (5 bits) */
    /* reserved                                                 5 (1 bit) */
    UPROPS_NUMERIC_TYPE_VALUE_SHIFT=6                       /*  6 (10 bits) */
};

#define GET_CATEGORY(props) ((props)&0x1f)
#define CAT_MASK(props) U_MASK(GET_CATEGORY(props))

#define GET_NUMERIC_TYPE_VALUE(props) ((props)>>UPROPS_NUMERIC_TYPE_VALUE_SHIFT)

/* constants for the storage form of numeric types and values */
enum {
    /** No numeric value. */
    UPROPS_NTV_NONE=0,
    /** Decimal digits: nv=0..9 */
    UPROPS_NTV_DECIMAL_START=1,
    /** Other digits: nv=0..9 */
    UPROPS_NTV_DIGIT_START=11,
    /** Small integers: nv=0..154 */
    UPROPS_NTV_NUMERIC_START=21,
    /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
    UPROPS_NTV_FRACTION_START=0xb0,
    /**
     * Large integers:
     * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
     * (only one significant decimal digit)
     */
    UPROPS_NTV_LARGE_START=0x1e0,
    /**
     * Sexagesimal numbers:
     * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
     */
    UPROPS_NTV_BASE60_START=0x300,
    /**
     * Fraction-20 values:
     * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
     * numerator: num = 2*(frac20&3)+1
     * denominator: den = 20<<(frac20>>2)
     */
    UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36,  // 0x300+9*4=0x324
    /**
     * Fraction-32 values:
     * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
     * numerator: num = 2*(frac32&3)+1
     * denominator: den = 32<<(frac32>>2)
     */
    UPROPS_NTV_FRACTION32_START=UPROPS_NTV_FRACTION20_START+24,  // 0x324+6*4=0x34c
    /** No numeric value (yet). */
    UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION32_START+16,  // 0x34c+4*4=0x35c

    UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1
};

#define UPROPS_NTV_GET_TYPE(ntv) \
    ((ntv==UPROPS_NTV_NONE) ? U_NT_NONE : \
    (ntv<UPROPS_NTV_DIGIT_START) ?  U_NT_DECIMAL : \
    (ntv<UPROPS_NTV_NUMERIC_START) ? U_NT_DIGIT : \
    U_NT_NUMERIC)

/* number of properties vector words */
#define UPROPS_VECTOR_WORDS     3

/*
 * Properties in vector word 0
 * Bits
 * 31..24   DerivedAge version major/minor one nibble each
 * 23..22   3..1: Bits 21..20 & 7..0 = Script_Extensions index
 *             3: Script value from Script_Extensions
 *             2: Script=Inherited
 *             1: Script=Common
 *             0: Script=bits 21..20 & 7..0
 * 21..20   Bits 9..8 of the UScriptCode, or index to Script_Extensions
 * 19..17   East Asian Width
 * 16.. 8   UBlockCode
 *  7.. 0   UScriptCode, or index to Script_Extensions
 */

/* derived age: one nibble each for major and minor version numbers */
#define UPROPS_AGE_MASK         0xff000000
#define UPROPS_AGE_SHIFT        24

/* Script_Extensions: mask includes Script */
#define UPROPS_SCRIPT_X_MASK    0x00f000ff
#define UPROPS_SCRIPT_X_SHIFT   22

// The UScriptCode or Script_Extensions index is split across two bit fields.
// (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.)
// Shift the high bits right by 12 to assemble the full value.
#define UPROPS_SCRIPT_HIGH_MASK    0x00300000
#define UPROPS_SCRIPT_HIGH_SHIFT   12
#define UPROPS_MAX_SCRIPT          0x3ff

#define UPROPS_EA_MASK          0x000e0000
#define UPROPS_EA_SHIFT         17

#define UPROPS_BLOCK_MASK       0x0001ff00
#define UPROPS_BLOCK_SHIFT      8

#define UPROPS_SCRIPT_LOW_MASK  0x000000ff

/* UPROPS_SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */
#define UPROPS_SCRIPT_X_WITH_COMMON     0x400000
#define UPROPS_SCRIPT_X_WITH_INHERITED  0x800000
#define UPROPS_SCRIPT_X_WITH_OTHER      0xc00000

#ifdef __cplusplus

namespace {

inline uint32_t uprops_mergeScriptCodeOrIndex(uint32_t scriptX) {
    return
        ((scriptX & UPROPS_SCRIPT_HIGH_MASK) >> UPROPS_SCRIPT_HIGH_SHIFT) |
        (scriptX & UPROPS_SCRIPT_LOW_MASK);
}

}  // namespace

#endif  // __cplusplus

/*
 * Properties in vector word 1
 * Each bit encodes one binary property.
 * The following constants represent the bit number, use 1<<UPROPS_XYZ.
 * UPROPS_BINARY_1_TOP<=32!
 *
 * Keep this list of property enums in sync with
 * propListNames[] in icu/source/tools/genprops/props2.c!
 *
 * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
 */
enum {
    UPROPS_WHITE_SPACE,
    UPROPS_DASH,
    UPROPS_HYPHEN,
    UPROPS_QUOTATION_MARK,
    UPROPS_TERMINAL_PUNCTUATION,
    UPROPS_MATH,
    UPROPS_HEX_DIGIT,
    UPROPS_ASCII_HEX_DIGIT,
    UPROPS_ALPHABETIC,
    UPROPS_IDEOGRAPHIC,
    UPROPS_DIACRITIC,
    UPROPS_EXTENDER,
    UPROPS_NONCHARACTER_CODE_POINT,
    UPROPS_GRAPHEME_EXTEND,
    UPROPS_GRAPHEME_LINK,
    UPROPS_IDS_BINARY_OPERATOR,
    UPROPS_IDS_TRINARY_OPERATOR,
    UPROPS_RADICAL,
    UPROPS_UNIFIED_IDEOGRAPH,
    UPROPS_DEFAULT_IGNORABLE_CODE_POINT,
    UPROPS_DEPRECATED,
    UPROPS_LOGICAL_ORDER_EXCEPTION,
    UPROPS_XID_START,
    UPROPS_XID_CONTINUE,
    UPROPS_ID_START,                            /* ICU 2.6, uprops format version 3.2 */
    UPROPS_ID_CONTINUE,
    UPROPS_GRAPHEME_BASE,
    UPROPS_S_TERM,                              /* new in ICU 3.0 and Unicode 4.0.1 */
    UPROPS_VARIATION_SELECTOR,
    UPROPS_PATTERN_SYNTAX,                      /* new in ICU 3.4 and Unicode 4.1 */
    UPROPS_PATTERN_WHITE_SPACE,
    UPROPS_PREPENDED_CONCATENATION_MARK,        // new in ICU 60 and Unicode 10
    UPROPS_BINARY_1_TOP                         /* ==32 - full! */
};

/*
 * Properties in vector word 2
 * Bits
 * 31..26   http://www.unicode.org/reports/tr51/#Emoji_Properties
 * 25..20   Line Break
 * 19..15   Sentence Break
 * 14..10   Word Break
 *  9.. 5   Grapheme Cluster Break
 *  4.. 0   Decomposition Type
 */
enum {
    UPROPS_2_EXTENDED_PICTOGRAPHIC=26,
    UPROPS_2_EMOJI_COMPONENT,
    UPROPS_2_EMOJI,
    UPROPS_2_EMOJI_PRESENTATION,
    UPROPS_2_EMOJI_MODIFIER,
    UPROPS_2_EMOJI_MODIFIER_BASE
};

#define UPROPS_LB_MASK          0x03f00000
#define UPROPS_LB_SHIFT         20

#define UPROPS_SB_MASK          0x000f8000
#define UPROPS_SB_SHIFT         15

#define UPROPS_WB_MASK          0x00007c00
#define UPROPS_WB_SHIFT         10

#define UPROPS_GCB_MASK         0x000003e0
#define UPROPS_GCB_SHIFT        5

#define UPROPS_DT_MASK          0x0000001f

/**
 * Gets the main properties value for a code point.
 * Implemented in uchar.c for uprops.cpp.
 */
U_CFUNC uint32_t
u_getMainProperties(UChar32 c);

/**
 * Get a properties vector word for a code point.
 * Implemented in uchar.c for uprops.cpp.
 * @return 0 if no data or illegal argument
 */
U_CFUNC uint32_t
u_getUnicodeProperties(UChar32 c, int32_t column);

/**
 * Get the the maximum values for some enum/int properties.
 * Use the same column numbers as for u_getUnicodeProperties().
 * The returned value will contain maximum values stored in the same bit fields
 * as where the enum values are stored in the u_getUnicodeProperties()
 * return values for the same columns.
 *
 * Valid columns are those for properties words that contain enumerated values.
 * (ICU 2.6: columns 0 and 2)
 * For other column numbers, this function will return 0.
 *
 * @internal
 */
U_CFUNC int32_t
uprv_getMaxValues(int32_t column);

/**
 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
 * @internal
 */
U_CFUNC UBool
u_isalnumPOSIX(UChar32 c);

/**
 * Checks if c is in
 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
 * with space=\p{Whitespace} and Control=Cc.
 * Implements UCHAR_POSIX_GRAPH.
 * @internal
 */
U_CFUNC UBool
u_isgraphPOSIX(UChar32 c);

/**
 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
 * Implements UCHAR_POSIX_PRINT.
 * @internal
 */
U_CFUNC UBool
u_isprintPOSIX(UChar32 c);

/** Turn a bit index into a bit flag. @internal */
#define UPROPS_FLAG(n) ((uint32_t)1<<(n))

/** Flags for general categories in the order of UCharCategory. @internal */
#define _Cn     UPROPS_FLAG(U_GENERAL_OTHER_TYPES)
#define _Lu     UPROPS_FLAG(U_UPPERCASE_LETTER)
#define _Ll     UPROPS_FLAG(U_LOWERCASE_LETTER)
#define _Lt     UPROPS_FLAG(U_TITLECASE_LETTER)
#define _Lm     UPROPS_FLAG(U_MODIFIER_LETTER)
/* #define _Lo     UPROPS_FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
#define _Mn     UPROPS_FLAG(U_NON_SPACING_MARK)
#define _Me     UPROPS_FLAG(U_ENCLOSING_MARK)
#define _Mc     UPROPS_FLAG(U_COMBINING_SPACING_MARK)
#define _Nd     UPROPS_FLAG(U_DECIMAL_DIGIT_NUMBER)
#define _Nl     UPROPS_FLAG(U_LETTER_NUMBER)
#define _No     UPROPS_FLAG(U_OTHER_NUMBER)
#define _Zs     UPROPS_FLAG(U_SPACE_SEPARATOR)
#define _Zl     UPROPS_FLAG(U_LINE_SEPARATOR)
#define _Zp     UPROPS_FLAG(U_PARAGRAPH_SEPARATOR)
#define _Cc     UPROPS_FLAG(U_CONTROL_CHAR)
#define _Cf     UPROPS_FLAG(U_FORMAT_CHAR)
#define _Co     UPROPS_FLAG(U_PRIVATE_USE_CHAR)
#define _Cs     UPROPS_FLAG(U_SURROGATE)
#define _Pd     UPROPS_FLAG(U_DASH_PUNCTUATION)
#define _Ps     UPROPS_FLAG(U_START_PUNCTUATION)
/* #define _Pe     UPROPS_FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
/* #define _Pc     UPROPS_FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
#define _Po     UPROPS_FLAG(U_OTHER_PUNCTUATION)
#define _Sm     UPROPS_FLAG(U_MATH_SYMBOL)
#define _Sc     UPROPS_FLAG(U_CURRENCY_SYMBOL)
#define _Sk     UPROPS_FLAG(U_MODIFIER_SYMBOL)
#define _So     UPROPS_FLAG(U_OTHER_SYMBOL)
#define _Pi     UPROPS_FLAG(U_INITIAL_PUNCTUATION)
/* #define _Pf     UPROPS_FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */

/** Some code points. @internal */
enum {
    uprops_TAB     =0x0009,
    uprops_LF      =0x000a,
    uprops_FF      =0x000c,
    uprops_CR      =0x000d,
    uprops_U_A     =0x0041,
    uprops_U_F     =0x0046,
    uprops_U_Z     =0x005a,
    uprops_U_a     =0x0061,
    uprops_U_f     =0x0066,
    uprops_U_z     =0x007a,
    uprops_DEL     =0x007f,
    uprops_NL      =0x0085,
    uprops_NBSP    =0x00a0,
    uprops_CGJ     =0x034f,
    uprops_FIGURESP=0x2007,
    uprops_HAIRSP  =0x200a,
    uprops_ZWNJ    =0x200c,
    uprops_ZWJ     =0x200d,
    uprops_RLM     =0x200f,
    uprops_NNBSP   =0x202f,
    uprops_WJ      =0x2060,
    uprops_INHSWAP =0x206a,
    uprops_NOMDIG  =0x206f,
    uprops_U_FW_A  =0xff21,
    uprops_U_FW_F  =0xff26,
    uprops_U_FW_Z  =0xff3a,
    uprops_U_FW_a  =0xff41,
    uprops_U_FW_f  =0xff46,
    uprops_U_FW_z  =0xff5a,
    uprops_ZWNBSP  =0xfeff
};

/**
 * Get the maximum length of a (regular/1.0/extended) character name.
 * @return 0 if no character names available.
 */
U_CAPI int32_t U_EXPORT2
uprv_getMaxCharNameLength(void);

/**
 * Fills set with characters that are used in Unicode character names.
 * Includes all characters that are used in regular/Unicode 1.0/extended names.
 * Just empties the set if no character names are available.
 * @param sa USetAdder to receive characters.
 */
U_CAPI void U_EXPORT2
uprv_getCharNameCharacters(const USetAdder *sa);

/**
 * Constants for which data and implementation files provide which properties.
 * Used by UnicodeSet for service-specific property enumeration.
 * @internal
 */
enum UPropertySource {
    /** No source, not a supported property. */
    UPROPS_SRC_NONE,
    /** From uchar.c/uprops.icu main trie */
    UPROPS_SRC_CHAR,
    /** From uchar.c/uprops.icu properties vectors trie */
    UPROPS_SRC_PROPSVEC,
    /** From unames.c/unames.icu */
    UPROPS_SRC_NAMES,
    /** From ucase.c/ucase.icu */
    UPROPS_SRC_CASE,
    /** From ubidi_props.c/ubidi.icu */
    UPROPS_SRC_BIDI,
    /** From uchar.c/uprops.icu main trie as well as properties vectors trie */
    UPROPS_SRC_CHAR_AND_PROPSVEC,
    /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
    UPROPS_SRC_CASE_AND_NORM,
    /** From normalizer2impl.cpp/nfc.nrm */
    UPROPS_SRC_NFC,
    /** From normalizer2impl.cpp/nfkc.nrm */
    UPROPS_SRC_NFKC,
    /** From normalizer2impl.cpp/nfkc_cf.nrm */
    UPROPS_SRC_NFKC_CF,
    /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
    UPROPS_SRC_NFC_CANON_ITER,
    // Text layout properties.
    UPROPS_SRC_INPC,
    UPROPS_SRC_INSC,
    UPROPS_SRC_VO,
    /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
    UPROPS_SRC_COUNT
};
typedef enum UPropertySource UPropertySource;

/**
 * @see UPropertySource
 * @internal
 */
U_CFUNC UPropertySource U_EXPORT2
uprops_getSource(UProperty which);

/**
 * Enumerate uprops.icu's main data trie and add the
 * start of each range of same properties to the set.
 * @internal
 */
U_CFUNC void U_EXPORT2
uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

/**
 * Enumerate uprops.icu's properties vectors trie and add the
 * start of each range of same properties to the set.
 * @internal
 */
U_CFUNC void U_EXPORT2
upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

U_CFUNC void U_EXPORT2
uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode);

/**
 * Return a set of characters for property enumeration.
 * For each two consecutive characters (start, limit) in the set,
 * all of the properties for start..limit-1 are all the same.
 *
 * @param sa USetAdder to receive result. Existing contents are lost.
 * @internal
 */
/*U_CFUNC void U_EXPORT2
uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
*/

/**
 * Swap the ICU Unicode character names file. See uchar.c.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
uchar_swapNames(const UDataSwapper *ds,
                const void *inData, int32_t length, void *outData,
                UErrorCode *pErrorCode);

#ifdef __cplusplus

U_NAMESPACE_BEGIN

class UnicodeSet;

class CharacterProperties {
public:
    CharacterProperties() = delete;
    static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
};

// implemented in uniset_props.cpp
U_CFUNC UnicodeSet *
uniset_getUnicode32Instance(UErrorCode &errorCode);

U_NAMESPACE_END

#endif



using icu::LocalPointer;
#if !UCONFIG_NO_NORMALIZATION
using icu::Normalizer2Factory;
using icu::Normalizer2Impl;
#endif
using icu::UInitOnce;
using icu::UnicodeSet;

namespace {

UBool U_CALLCONV characterproperties_cleanup();

constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;

struct Inclusion {
    UnicodeSet  *fSet = nullptr;
    UInitOnce    fInitOnce = U_INITONCE_INITIALIZER;
};
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()

UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};

UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};

icu::UMutex cpMutex;

//----------------------------------------------------------------
// Inclusions list
//----------------------------------------------------------------

// USetAdder implementation
// Does not use uset.h to reduce code dependencies
void U_CALLCONV
_set_add(USet *set, UChar32 c) {
    ((UnicodeSet *)set)->add(c);
}

void U_CALLCONV
_set_addRange(USet *set, UChar32 start, UChar32 end) {
    ((UnicodeSet *)set)->add(start, end);
}

void U_CALLCONV
_set_addString(USet *set, const UChar *str, int32_t length) {
    ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
}

UBool U_CALLCONV characterproperties_cleanup() {
    for (Inclusion &in: gInclusions) {
        delete in.fSet;
        in.fSet = nullptr;
        in.fInitOnce.reset();
    }
    for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
        delete sets[i];
        sets[i] = nullptr;
    }
    for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
        ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
        maps[i] = nullptr;
    }
    return TRUE;
}

void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
    // This function is invoked only via umtx_initOnce().
    U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
    if (src == UPROPS_SRC_NONE) {
        errorCode = U_INTERNAL_PROGRAM_ERROR;
        return;
    }
    U_ASSERT(gInclusions[src].fSet == nullptr);

    LocalPointer<UnicodeSet> incl(new UnicodeSet());
    if (incl.isNull()) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    USetAdder sa = {
        (USet *)incl.getAlias(),
        _set_add,
        _set_addRange,
        _set_addString,
        nullptr, // don't need remove()
        nullptr // don't need removeRange()
    };

    switch(src) {
    case UPROPS_SRC_CHAR:
        uchar_addPropertyStarts(&sa, &errorCode);
        break;
    case UPROPS_SRC_PROPSVEC:
        upropsvec_addPropertyStarts(&sa, &errorCode);
        break;
    case UPROPS_SRC_CHAR_AND_PROPSVEC:
        uchar_addPropertyStarts(&sa, &errorCode);
        upropsvec_addPropertyStarts(&sa, &errorCode);
        break;
#if !UCONFIG_NO_NORMALIZATION
    case UPROPS_SRC_CASE_AND_NORM: {
        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
        if(U_SUCCESS(errorCode)) {
            impl->addPropertyStarts(&sa, errorCode);
        }
        ucase_addPropertyStarts(&sa, &errorCode);
        break;
    }
    case UPROPS_SRC_NFC: {
        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
        if(U_SUCCESS(errorCode)) {
            impl->addPropertyStarts(&sa, errorCode);
        }
        break;
    }
    case UPROPS_SRC_NFKC: {
        const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
        if(U_SUCCESS(errorCode)) {
            impl->addPropertyStarts(&sa, errorCode);
        }
        break;
    }
    case UPROPS_SRC_NFKC_CF: {
        const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
        if(U_SUCCESS(errorCode)) {
            impl->addPropertyStarts(&sa, errorCode);
        }
        break;
    }
    case UPROPS_SRC_NFC_CANON_ITER: {
        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
        if(U_SUCCESS(errorCode)) {
            impl->addCanonIterPropertyStarts(&sa, errorCode);
        }
        break;
    }
#endif
    case UPROPS_SRC_CASE:
        ucase_addPropertyStarts(&sa, &errorCode);
        break;
    case UPROPS_SRC_BIDI:
        ubidi_addPropertyStarts(&sa, &errorCode);
        break;
    case UPROPS_SRC_INPC:
    case UPROPS_SRC_INSC:
    case UPROPS_SRC_VO:
        uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
        break;
    default:
        errorCode = U_INTERNAL_PROGRAM_ERROR;
        break;
    }

    if (U_FAILURE(errorCode)) {
        return;
    }
    if (incl->isBogus()) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    // Compact for caching.
    incl->compact();
    gInclusions[src].fSet = incl.orphan();
    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}

const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return nullptr; }
    if (src < 0 || UPROPS_SRC_COUNT <= src) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
    Inclusion &i = gInclusions[src];
    umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
    return i.fSet;
}

void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
    // This function is invoked only via umtx_initOnce().
    U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
    int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
    U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
    UPropertySource src = uprops_getSource(prop);
    const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
    if (U_FAILURE(errorCode)) {
        return;
    }

    LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
    if (intPropIncl.isNull()) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    int32_t numRanges = incl->getRangeCount();
    int32_t prevValue = 0;
    for (int32_t i = 0; i < numRanges; ++i) {
        UChar32 rangeEnd = incl->getRangeEnd(i);
        for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
            int32_t value = u_getIntPropertyValue(c, prop);
            if (value != prevValue) {
                intPropIncl->add(c);
                prevValue = value;
            }
        }
    }

    if (intPropIncl->isBogus()) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    // Compact for caching.
    intPropIncl->compact();
    gInclusions[inclIndex].fSet = intPropIncl.orphan();
    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}

}  // namespace

U_NAMESPACE_BEGIN

const UnicodeSet *CharacterProperties::getInclusionsForProperty(
        UProperty prop, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return nullptr; }
    if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
        int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
        Inclusion &i = gInclusions[inclIndex];
        umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
        return i.fSet;
    } else {
        UPropertySource src = uprops_getSource(prop);
        return getInclusionsForSource(src, errorCode);
    }
}

U_NAMESPACE_END

namespace {

UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return nullptr; }
    LocalPointer<UnicodeSet> set(new UnicodeSet());
    if (set.isNull()) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }
    const UnicodeSet *inclusions =
        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
    if (U_FAILURE(errorCode)) { return nullptr; }
    int32_t numRanges = inclusions->getRangeCount();
    UChar32 startHasProperty = -1;

    for (int32_t i = 0; i < numRanges; ++i) {
        UChar32 rangeEnd = inclusions->getRangeEnd(i);
        for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
            // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
            if (u_hasBinaryProperty(c, property)) {
                if (startHasProperty < 0) {
                    // Transition from false to true.
                    startHasProperty = c;
                }
            } else if (startHasProperty >= 0) {
                // Transition from true to false.
                set->add(startHasProperty, c - 1);
                startHasProperty = -1;
            }
        }
    }
    if (startHasProperty >= 0) {
        set->add(startHasProperty, 0x10FFFF);
    }
    set->freeze();
    return set.orphan();
}

UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return nullptr; }
    uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
    icu::LocalUMutableCPTriePointer mutableTrie(
        umutablecptrie_open(nullValue, nullValue, &errorCode));
    const UnicodeSet *inclusions =
        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
    if (U_FAILURE(errorCode)) { return nullptr; }
    int32_t numRanges = inclusions->getRangeCount();
    UChar32 start = 0;
    uint32_t value = nullValue;

    for (int32_t i = 0; i < numRanges; ++i) {
        UChar32 rangeEnd = inclusions->getRangeEnd(i);
        for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
            uint32_t nextValue = u_getIntPropertyValue(c, property);
            if (value != nextValue) {
                if (value != nullValue) {
                    umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
                }
                start = c;
                value = nextValue;
            }
        }
    }
    if (value != 0) {
        umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
    }

    UCPTrieType type;
    if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
        type = UCPTRIE_TYPE_FAST;
    } else {
        type = UCPTRIE_TYPE_SMALL;
    }
    UCPTrieValueWidth valueWidth;
    // TODO: UCharacterProperty.IntProperty
    int32_t max = u_getIntPropertyMaxValue(property);
    if (max <= 0xff) {
        valueWidth = UCPTRIE_VALUE_BITS_8;
    } else if (max <= 0xffff) {
        valueWidth = UCPTRIE_VALUE_BITS_16;
    } else {
        valueWidth = UCPTRIE_VALUE_BITS_32;
    }
    return reinterpret_cast<UCPMap *>(
        umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
}

}  // namespace

U_NAMESPACE_USE

U_CAPI const USet * U_EXPORT2
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
    if (U_FAILURE(*pErrorCode)) { return nullptr; }
    if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
    Mutex m(&cpMutex);
    UnicodeSet *set = sets[property];
    if (set == nullptr) {
        sets[property] = set = makeSet(property, *pErrorCode);
    }
    if (U_FAILURE(*pErrorCode)) { return nullptr; }
    return set->toUSet();
}

U_CAPI const UCPMap * U_EXPORT2
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
    if (U_FAILURE(*pErrorCode)) { return nullptr; }
    if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
    Mutex m(&cpMutex);
    UCPMap *map = maps[property - UCHAR_INT_START];
    if (map == nullptr) {
        maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
    }
    return map;
}
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1999-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************
*
*   Copyright (C) 1997-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
********************************************************************
*/




#if U_SHOW_CPLUSPLUS_API



/**
 * \file
 * \brief C++ API: Character Iterator
 */
 
U_NAMESPACE_BEGIN
/**
 * Abstract class that defines an API for forward-only iteration
 * on text objects.
 * This is a minimal interface for iteration without random access
 * or backwards iteration. It is especially useful for wrapping
 * streams with converters into an object for collation or
 * normalization.
 *
 * <p>Characters can be accessed in two ways: as code units or as
 * code points.
 * Unicode code points are 21-bit integers and are the scalar values
 * of Unicode characters. ICU uses the type UChar32 for them.
 * Unicode code units are the storage units of a given
 * Unicode/UCS Transformation Format (a character encoding scheme).
 * With UTF-16, all code points can be represented with either one
 * or two code units ("surrogates").
 * String storage is typically based on code units, while properties
 * of characters are typically determined using code point values.
 * Some processes may be designed to work with sequences of code units,
 * or it may be known that all characters that are important to an
 * algorithm can be represented with single code units.
 * Other processes will need to use the code point access functions.</p>
 *
 * <p>ForwardCharacterIterator provides nextPostInc() to access
 * a code unit and advance an internal position into the text object,
 * similar to a <code>return text[position++]</code>.<br>
 * It provides next32PostInc() to access a code point and advance an internal
 * position.</p>
 *
 * <p>next32PostInc() assumes that the current position is that of
 * the beginning of a code point, i.e., of its first code unit.
 * After next32PostInc(), this will be true again.
 * In general, access to code units and code points in the same
 * iteration loop should not be mixed. In UTF-16, if the current position
 * is on a second code unit (Low Surrogate), then only that code unit
 * is returned even by next32PostInc().</p>
 *
 * <p>For iteration with either function, there are two ways to
 * check for the end of the iteration. When there are no more
 * characters in the text object:
 * <ul>
 * <li>The hasNext() function returns FALSE.</li>
 * <li>nextPostInc() and next32PostInc() return DONE
 *     when one attempts to read beyond the end of the text object.</li>
 * </ul>
 *
 * Example:
 * \code 
 * void function1(ForwardCharacterIterator &it) {
 *     UChar32 c;
 *     while(it.hasNext()) {
 *         c=it.next32PostInc();
 *         // use c
 *     }
 * }
 *
 * void function1(ForwardCharacterIterator &it) {
 *     char16_t c;
 *     while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
 *         // use c
 *      }
 *  }
 * \endcode
 * </p>
 *
 * @stable ICU 2.0
 */
class U_COMMON_API ForwardCharacterIterator : public UObject {
public:
    /**
     * Value returned by most of ForwardCharacterIterator's functions
     * when the iterator has reached the limits of its iteration.
     * @stable ICU 2.0
     */
    enum { DONE = 0xffff };
    
    /**
     * Destructor.  
     * @stable ICU 2.0
     */
    virtual ~ForwardCharacterIterator();
    
    /**
     * Returns true when both iterators refer to the same
     * character in the same character-storage object.  
     * @param that The ForwardCharacterIterator to be compared for equality
     * @return true when both iterators refer to the same
     * character in the same character-storage object
     * @stable ICU 2.0
     */
    virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
    
    /**
     * Returns true when the iterators refer to different
     * text-storage objects, or to different characters in the
     * same text-storage object.  
     * @param that The ForwardCharacterIterator to be compared for inequality
     * @return true when the iterators refer to different
     * text-storage objects, or to different characters in the
     * same text-storage object
     * @stable ICU 2.0
     */
    inline UBool operator!=(const ForwardCharacterIterator& that) const;
    
    /**
     * Generates a hash code for this iterator.  
     * @return the hash code.
     * @stable ICU 2.0
     */
    virtual int32_t hashCode(void) const = 0;
    
    /**
     * Returns a UClassID for this ForwardCharacterIterator ("poor man's
     * RTTI").<P> Despite the fact that this function is public,
     * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! 
     * @return a UClassID for this ForwardCharacterIterator 
     * @stable ICU 2.0
     */
    virtual UClassID getDynamicClassID(void) const = 0;
    
    /**
     * Gets the current code unit for returning and advances to the next code unit
     * in the iteration range
     * (toward endIndex()).  If there are
     * no more code units to return, returns DONE.
     * @return the current code unit.
     * @stable ICU 2.0
     */
    virtual char16_t         nextPostInc(void) = 0;
    
    /**
     * Gets the current code point for returning and advances to the next code point
     * in the iteration range
     * (toward endIndex()).  If there are
     * no more code points to return, returns DONE.
     * @return the current code point.
     * @stable ICU 2.0
     */
    virtual UChar32       next32PostInc(void) = 0;
    
    /**
     * Returns FALSE if there are no more code units or code points
     * at or after the current position in the iteration range.
     * This is used with nextPostInc() or next32PostInc() in forward
     * iteration.
     * @returns FALSE if there are no more code units or code points
     * at or after the current position in the iteration range.
     * @stable ICU 2.0
     */
    virtual UBool        hasNext() = 0;
    
protected:
    /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
    ForwardCharacterIterator();
    
    /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
    ForwardCharacterIterator(const ForwardCharacterIterator &other);
    
    /**
     * Assignment operator to be overridden in the implementing class.
     * @stable ICU 2.0
     */
    ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
};

/**
 * Abstract class that defines an API for iteration
 * on text objects.
 * This is an interface for forward and backward iteration
 * and random access into a text object.
 *
 * <p>The API provides backward compatibility to the Java and older ICU
 * CharacterIterator classes but extends them significantly:
 * <ol>
 * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
 * <li>While the old API functions provided forward iteration with
 *     "pre-increment" semantics, the new one also provides functions
 *     with "post-increment" semantics. They are more efficient and should
 *     be the preferred iterator functions for new implementations.
 *     The backward iteration always had "pre-decrement" semantics, which
 *     are efficient.</li>
 * <li>Just like ForwardCharacterIterator, it provides access to
 *     both code units and code points. Code point access versions are available
 *     for the old and the new iteration semantics.</li>
 * <li>There are new functions for setting and moving the current position
 *     without returning a character, for efficiency.</li>
 * </ol>
 *
 * See ForwardCharacterIterator for examples for using the new forward iteration
 * functions. For backward iteration, there is also a hasPrevious() function
 * that can be used analogously to hasNext().
 * The old functions work as before and are shown below.</p>
 *
 * <p>Examples for some of the new functions:</p>
 *
 * Forward iteration with hasNext():
 * \code
 * void forward1(CharacterIterator &it) {
 *     UChar32 c;
 *     for(it.setToStart(); it.hasNext();) {
 *         c=it.next32PostInc();
 *         // use c
 *     }
 *  }
 * \endcode
 * Forward iteration more similar to loops with the old forward iteration,
 * showing a way to convert simple for() loops:
 * \code
 * void forward2(CharacterIterator &it) {
 *     char16_t c;
 *     for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
 *          // use c
 *      }
 * }
 * \endcode
 * Backward iteration with setToEnd() and hasPrevious():
 * \code
 *  void backward1(CharacterIterator &it) {
 *      UChar32 c;
 *      for(it.setToEnd(); it.hasPrevious();) {
 *         c=it.previous32();
 *          // use c
 *      }
 *  }
 * \endcode
 * Backward iteration with a more traditional for() loop:
 * \code
 * void backward2(CharacterIterator &it) {
 *     char16_t c;
 *     for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
 *         // use c
 *      }
 *  }
 * \endcode
 *
 * Example for random access:
 * \code
 *  void random(CharacterIterator &it) {
 *      // set to the third code point from the beginning
 *      it.move32(3, CharacterIterator::kStart);
 *      // get a code point from here without moving the position
 *      UChar32 c=it.current32();
 *      // get the position
 *      int32_t pos=it.getIndex();
 *      // get the previous code unit
 *      char16_t u=it.previous();
 *      // move back one more code unit
 *      it.move(-1, CharacterIterator::kCurrent);
 *      // set the position back to where it was
 *      // and read the same code point c and move beyond it
 *      it.setIndex(pos);
 *      if(c!=it.next32PostInc()) {
 *          exit(1); // CharacterIterator inconsistent
 *      }
 *  }
 * \endcode
 *
 * <p>Examples, especially for the old API:</p>
 *
 * Function processing characters, in this example simple output
 * <pre>
 * \code
 *  void processChar( char16_t c )
 *  {
 *      cout << " " << c;
 *  }
 * \endcode
 * </pre>
 * Traverse the text from start to finish
 * <pre> 
 * \code
 *  void traverseForward(CharacterIterator& iter)
 *  {
 *      for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
 *          processChar(c);
 *      }
 *  }
 * \endcode
 * </pre>
 * Traverse the text backwards, from end to start
 * <pre>
 * \code
 *  void traverseBackward(CharacterIterator& iter)
 *  {
 *      for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
 *          processChar(c);
 *      }
 *  }
 * \endcode
 * </pre>
 * Traverse both forward and backward from a given position in the text. 
 * Calls to notBoundary() in this example represents some additional stopping criteria.
 * <pre>
 * \code
 * void traverseOut(CharacterIterator& iter, int32_t pos)
 * {
 *      char16_t c;
 *      for (c = iter.setIndex(pos);
 *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
 *          c = iter.next()) {}
 *      int32_t end = iter.getIndex();
 *      for (c = iter.setIndex(pos);
 *          c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
 *          c = iter.previous()) {}
 *      int32_t start = iter.getIndex() + 1;
 *  
 *      cout << "start: " << start << " end: " << end << endl;
 *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
 *          processChar(c);
 *     }
 *  }
 * \endcode
 * </pre>
 * Creating a StringCharacterIterator and calling the test functions
 * <pre>
 * \code
 *  void CharacterIterator_Example( void )
 *   {
 *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
 *       UnicodeString text("Ein kleiner Satz.");
 *       StringCharacterIterator iterator(text);
 *       cout << "----- traverseForward: -----------" << endl;
 *       traverseForward( iterator );
 *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
 *       traverseBackward( iterator );
 *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
 *       traverseOut( iterator, 7 );
 *       cout << endl << endl << "-----" << endl;
 *   }
 * \endcode
 * </pre>
 *
 * @stable ICU 2.0
 */
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
public:
    /**
     * Origin enumeration for the move() and move32() functions.
     * @stable ICU 2.0
     */
    enum EOrigin { kStart, kCurrent, kEnd };

    /**
     * Destructor.
     * @stable ICU 2.0
     */
    virtual ~CharacterIterator();

    /**
     * Returns a pointer to a new CharacterIterator of the same
     * concrete class as this one, and referring to the same
     * character in the same text-storage object as this one.  The
     * caller is responsible for deleting the new clone.  
     * @return a pointer to a new CharacterIterator
     * @stable ICU 2.0
     */
    virtual CharacterIterator* clone() const = 0;

    /**
     * Sets the iterator to refer to the first code unit in its
     * iteration range, and returns that code unit.
     * This can be used to begin an iteration with next().
     * @return the first code unit in its iteration range.
     * @stable ICU 2.0
     */
    virtual char16_t         first(void) = 0;

    /**
     * Sets the iterator to refer to the first code unit in its
     * iteration range, returns that code unit, and moves the position
     * to the second code unit. This is an alternative to setToStart()
     * for forward iteration with nextPostInc().
     * @return the first code unit in its iteration range.
     * @stable ICU 2.0
     */
    virtual char16_t         firstPostInc(void);

    /**
     * Sets the iterator to refer to the first code point in its
     * iteration range, and returns that code unit,
     * This can be used to begin an iteration with next32().
     * Note that an iteration with next32PostInc(), beginning with,
     * e.g., setToStart() or firstPostInc(), is more efficient.
     * @return the first code point in its iteration range.
     * @stable ICU 2.0
     */
    virtual UChar32       first32(void) = 0;

    /**
     * Sets the iterator to refer to the first code point in its
     * iteration range, returns that code point, and moves the position
     * to the second code point. This is an alternative to setToStart()
     * for forward iteration with next32PostInc().
     * @return the first code point in its iteration range.
     * @stable ICU 2.0
     */
    virtual UChar32       first32PostInc(void);

    /**
     * Sets the iterator to refer to the first code unit or code point in its
     * iteration range. This can be used to begin a forward
     * iteration with nextPostInc() or next32PostInc().
     * @return the start position of the iteration range
     * @stable ICU 2.0
     */
    inline int32_t    setToStart();

    /**
     * Sets the iterator to refer to the last code unit in its
     * iteration range, and returns that code unit.
     * This can be used to begin an iteration with previous().
     * @return the last code unit.
     * @stable ICU 2.0
     */
    virtual char16_t         last(void) = 0;
        
    /**
     * Sets the iterator to refer to the last code point in its
     * iteration range, and returns that code unit.
     * This can be used to begin an iteration with previous32().
     * @return the last code point.
     * @stable ICU 2.0
     */
    virtual UChar32       last32(void) = 0;

    /**
     * Sets the iterator to the end of its iteration range, just behind
     * the last code unit or code point. This can be used to begin a backward
     * iteration with previous() or previous32().
     * @return the end position of the iteration range
     * @stable ICU 2.0
     */
    inline int32_t    setToEnd();

    /**
     * Sets the iterator to refer to the "position"-th code unit
     * in the text-storage object the iterator refers to, and
     * returns that code unit.  
     * @param position the "position"-th code unit in the text-storage object
     * @return the "position"-th code unit.
     * @stable ICU 2.0
     */
    virtual char16_t         setIndex(int32_t position) = 0;

    /**
     * Sets the iterator to refer to the beginning of the code point
     * that contains the "position"-th code unit
     * in the text-storage object the iterator refers to, and
     * returns that code point.
     * The current position is adjusted to the beginning of the code point
     * (its first code unit).
     * @param position the "position"-th code unit in the text-storage object
     * @return the "position"-th code point.
     * @stable ICU 2.0
     */
    virtual UChar32       setIndex32(int32_t position) = 0;

    /**
     * Returns the code unit the iterator currently refers to. 
     * @return the current code unit. 
     * @stable ICU 2.0
     */
    virtual char16_t         current(void) const = 0;
        
    /**
     * Returns the code point the iterator currently refers to.  
     * @return the current code point.
     * @stable ICU 2.0
     */
    virtual UChar32       current32(void) const = 0;
        
    /**
     * Advances to the next code unit in the iteration range
     * (toward endIndex()), and returns that code unit.  If there are
     * no more code units to return, returns DONE.
     * @return the next code unit.
     * @stable ICU 2.0
     */
    virtual char16_t         next(void) = 0;
        
    /**
     * Advances to the next code point in the iteration range
     * (toward endIndex()), and returns that code point.  If there are
     * no more code points to return, returns DONE.
     * Note that iteration with "pre-increment" semantics is less
     * efficient than iteration with "post-increment" semantics
     * that is provided by next32PostInc().
     * @return the next code point.
     * @stable ICU 2.0
     */
    virtual UChar32       next32(void) = 0;
        
    /**
     * Advances to the previous code unit in the iteration range
     * (toward startIndex()), and returns that code unit.  If there are
     * no more code units to return, returns DONE.  
     * @return the previous code unit.
     * @stable ICU 2.0
     */
    virtual char16_t         previous(void) = 0;

    /**
     * Advances to the previous code point in the iteration range
     * (toward startIndex()), and returns that code point.  If there are
     * no more code points to return, returns DONE. 
     * @return the previous code point. 
     * @stable ICU 2.0
     */
    virtual UChar32       previous32(void) = 0;

    /**
     * Returns FALSE if there are no more code units or code points
     * before the current position in the iteration range.
     * This is used with previous() or previous32() in backward
     * iteration.
     * @return FALSE if there are no more code units or code points
     * before the current position in the iteration range, return TRUE otherwise.
     * @stable ICU 2.0
     */
    virtual UBool        hasPrevious() = 0;

    /**
     * Returns the numeric index in the underlying text-storage
     * object of the character returned by first().  Since it's
     * possible to create an iterator that iterates across only
     * part of a text-storage object, this number isn't
     * necessarily 0.  
     * @returns the numeric index in the underlying text-storage
     * object of the character returned by first().
     * @stable ICU 2.0
     */
    inline int32_t       startIndex(void) const;
        
    /**
     * Returns the numeric index in the underlying text-storage
     * object of the position immediately BEYOND the character
     * returned by last().  
     * @return the numeric index in the underlying text-storage
     * object of the position immediately BEYOND the character
     * returned by last().
     * @stable ICU 2.0
     */
    inline int32_t       endIndex(void) const;
        
    /**
     * Returns the numeric index in the underlying text-storage
     * object of the character the iterator currently refers to
     * (i.e., the character returned by current()).  
     * @return the numeric index in the text-storage object of 
     * the character the iterator currently refers to
     * @stable ICU 2.0
     */
    inline int32_t       getIndex(void) const;

    /**
     * Returns the length of the entire text in the underlying
     * text-storage object.
     * @return the length of the entire text in the text-storage object
     * @stable ICU 2.0
     */
    inline int32_t           getLength() const;

    /**
     * Moves the current position relative to the start or end of the
     * iteration range, or relative to the current position itself.
     * The movement is expressed in numbers of code units forward
     * or backward by specifying a positive or negative delta.
     * @param delta the position relative to origin. A positive delta means forward;
     * a negative delta means backward.
     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
     * @return the new position
     * @stable ICU 2.0
     */
    virtual int32_t      move(int32_t delta, EOrigin origin) = 0;

    /**
     * Moves the current position relative to the start or end of the
     * iteration range, or relative to the current position itself.
     * The movement is expressed in numbers of code points forward
     * or backward by specifying a positive or negative delta.
     * @param delta the position relative to origin. A positive delta means forward;
     * a negative delta means backward.
     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
     * @return the new position
     * @stable ICU 2.0
     */
#ifdef move32
     // One of the system headers right now is sometimes defining a conflicting macro we don't use
#undef move32
#endif
    virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;

    /**
     * Copies the text under iteration into the UnicodeString
     * referred to by "result".  
     * @param result Receives a copy of the text under iteration.  
     * @stable ICU 2.0
     */
    virtual void            getText(UnicodeString&  result) = 0;

protected:
    /**
     * Empty constructor.
     * @stable ICU 2.0
     */
    CharacterIterator();

    /**
     * Constructor, just setting the length field in this base class.
     * @stable ICU 2.0
     */
    CharacterIterator(int32_t length);

    /**
     * Constructor, just setting the length and position fields in this base class.
     * @stable ICU 2.0
     */
    CharacterIterator(int32_t length, int32_t position);

    /**
     * Constructor, just setting the length, start, end, and position fields in this base class.
     * @stable ICU 2.0
     */
    CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
  
    /**
     * Copy constructor.
     *
     * @param that The CharacterIterator to be copied
     * @stable ICU 2.0
     */
    CharacterIterator(const CharacterIterator &that);

    /**
     * Assignment operator.  Sets this CharacterIterator to have the same behavior,
     * as the one passed in.
     * @param that The CharacterIterator passed in.
     * @return the newly set CharacterIterator.
     * @stable ICU 2.0
     */
    CharacterIterator &operator=(const CharacterIterator &that);

    /**
     * Base class text length field.
     * Necessary this for correct getText() and hashCode().
     * @stable ICU 2.0
     */
    int32_t textLength;

    /**
     * Base class field for the current position.
     * @stable ICU 2.0
     */
    int32_t  pos;

    /**
     * Base class field for the start of the iteration range.
     * @stable ICU 2.0
     */
    int32_t  begin;

    /**
     * Base class field for the end of the iteration range.
     * @stable ICU 2.0
     */
    int32_t  end;
};

inline UBool
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
    return !operator==(that);
}

inline int32_t
CharacterIterator::setToStart() {
    return move(0, kStart);
}

inline int32_t
CharacterIterator::setToEnd() {
    return move(0, kEnd);
}

inline int32_t
CharacterIterator::startIndex(void) const {
    return begin;
}

inline int32_t
CharacterIterator::endIndex(void) const {
    return end;
}

inline int32_t
CharacterIterator::getIndex(void) const {
    return pos;
}

inline int32_t
CharacterIterator::getLength(void) const {
    return textLength;
}

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */



U_NAMESPACE_BEGIN

ForwardCharacterIterator::~ForwardCharacterIterator() {}
ForwardCharacterIterator::ForwardCharacterIterator()
: UObject()
{}
ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
: UObject(other)
{}


CharacterIterator::CharacterIterator()
: textLength(0), pos(0), begin(0), end(0) {
}

CharacterIterator::CharacterIterator(int32_t length)
: textLength(length), pos(0), begin(0), end(length) {
    if(textLength < 0) {
        textLength = end = 0;
    }
}

CharacterIterator::CharacterIterator(int32_t length, int32_t position)
: textLength(length), pos(position), begin(0), end(length) {
    if(textLength < 0) {
        textLength = end = 0;
    }
    if(pos < 0) {
        pos = 0;
    } else if(pos > end) {
        pos = end;
    }
}

CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
: textLength(length), pos(position), begin(textBegin), end(textEnd) {
    if(textLength < 0) {
        textLength = 0;
    }
    if(begin < 0) {
        begin = 0;
    } else if(begin > textLength) {
        begin = textLength;
    }
    if(end < begin) {
        end = begin;
    } else if(end > textLength) {
        end = textLength;
    }
    if(pos < begin) {
        pos = begin;
    } else if(pos > end) {
        pos = end;
    }
}

CharacterIterator::~CharacterIterator() {}

CharacterIterator::CharacterIterator(const CharacterIterator &that) :
ForwardCharacterIterator(that),
textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
{
}

CharacterIterator &
CharacterIterator::operator=(const CharacterIterator &that) {
    ForwardCharacterIterator::operator=(that);
    textLength = that.textLength;
    pos = that.pos;
    begin = that.begin;
    end = that.end;
    return *this;
}

// implementing first[32]PostInc() directly in a subclass should be faster
// but these implementations make subclassing a little easier
UChar
CharacterIterator::firstPostInc(void) {
    setToStart();
    return nextPostInc();
}

UChar32
CharacterIterator::first32PostInc(void) {
    setToStart();
    return next32PostInc();
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2010-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  charstr.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010may19
*   created by: Markus W. Scherer
*/





// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.H
*
* Contains CString interface
*
* @author       Helena Shih
*
* Modification History:
*
*   Date        Name        Description
*   6/17/98     hshih       Created.
*  05/03/99     stephen     Changed from functions to macros.
*  06/14/99     stephen     Added icu_strncat, icu_strncmp, icu_tolower
*
******************************************************************************
*/




#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE  strcpy(dst, src)
#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)

/**
 * Is c an ASCII-repertoire letter a-z or A-Z?
 * Note: The implementation is specific to whether ICU is compiled for
 * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
 */
U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c);

U_CAPI char U_EXPORT2
uprv_toupper(char c);


U_CAPI char U_EXPORT2
uprv_asciitolower(char c);

U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c);

#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define uprv_tolower uprv_asciitolower
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define uprv_tolower uprv_ebcdictolower
#else
#   error U_CHARSET_FAMILY is not valid
#endif

#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)

/* Conversion from a digit to the character with radix base from 2-19 */
/* May need to use U_UPPER_ORDINAL*/
#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))

U_CAPI char* U_EXPORT2
uprv_strdup(const char *src);

/**
 * uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
 * Terminate with a null at offset n.   If n is -1, works like uprv_strdup
 * @param src
 * @param n length of the input string, not including null.
 * @return new string (owned by caller, use uprv_free to free).
 * @internal
 */
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n);

U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str);

U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str);

U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char *buffer, int32_t n, int32_t radix);

U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);

U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix);

/**
 * Case-insensitive, language-independent string comparison
 * limited to the ASCII character repertoire.
 */
U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2);

/**
 * Case-insensitive, language-independent string comparison
 * limited to the ASCII character repertoire.
 */
U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n);


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 1999-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  uinvchar.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:2
*
*   created on: 2004sep14
*   created by: Markus W. Scherer
*
*   Definitions for handling invariant characters, moved here from putil.c
*   for better modularization.
*/



#ifdef __cplusplus

#endif

/**
 * Check if a char string only contains invariant characters.
 * See utypes.h for details.
 *
 * @param s Input string pointer.
 * @param length Length of the string, can be -1 if NUL-terminated.
 * @return TRUE if s contains only invariant characters.
 *
 * @internal (ICU 2.8)
 */
U_INTERNAL UBool U_EXPORT2
uprv_isInvariantString(const char *s, int32_t length);

/**
 * Check if a Unicode string only contains invariant characters.
 * See utypes.h for details.
 *
 * @param s Input string pointer.
 * @param length Length of the string, can be -1 if NUL-terminated.
 * @return TRUE if s contains only invariant characters.
 *
 * @internal (ICU 2.8)
 */
U_INTERNAL UBool U_EXPORT2
uprv_isInvariantUString(const UChar *s, int32_t length);

/**
 * \def U_UPPER_ORDINAL
 * Get the ordinal number of an uppercase invariant character
 * @internal
 */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define U_UPPER_ORDINAL(x) ((x)-'A')
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
                              (((x) < 'S') ? ((x)-'J'+9) : \
                               ((x)-'S'+18)))
#else
#   error Unknown charset family!
#endif

#ifdef __cplusplus

U_NAMESPACE_BEGIN

/**
 * Like U_UPPER_ORDINAL(x) but with validation.
 * Returns 0..25 for A..Z else a value outside 0..25.
 */
inline int32_t uprv_upperOrdinal(int32_t c) {
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
    return c - 'A';
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
    // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
    if (c <= 'I') { return c - 'A'; }  // A-I --> 0-8
    if (c < 'J') { return -1; }
    if (c <= 'R') { return c - 'J' + 9; }  // J-R --> 9..17
    if (c < 'S') { return -1; }
    return c - 'S' + 18;  // S-Z --> 18..25
#else
#   error Unknown charset family!
#endif
}

// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
// Returns 0..25 for a..z else a value outside 0..25.
inline int32_t uprv_lowerOrdinal(int32_t c) {
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
    return c - 'a';
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
    // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
    if (c <= 'i') { return c - 'a'; }  // a-i --> 0-8
    if (c < 'j') { return -1; }
    if (c <= 'r') { return c - 'j' + 9; }  // j-r --> 9..17
    if (c < 's') { return -1; }
    return c - 's' + 18;  // s-z --> 18..25
#else
#   error Unknown charset family!
#endif
}

U_NAMESPACE_END

#endif

/**
 * Returns true if c == '@' is possible.
 * The @ sign is variant, and the @ sign used on one
 * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
 * @internal
 */
U_CFUNC UBool
uprv_isEbcdicAtSign(char c);

/**
 * \def uprv_isAtSign
 * Returns true if c == '@' is possible.
 * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
 * @internal
 */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define uprv_isAtSign(c) ((c)=='@')
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
#else
#   error Unknown charset family!
#endif

/**
 * Compare two EBCDIC invariant-character strings in ASCII order.
 * @internal
 */
U_INTERNAL int32_t U_EXPORT2
uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);

/**
 * \def uprv_compareInvCharsAsAscii
 * Compare two invariant-character strings in ASCII order.
 * @internal
 */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
#else
#   error Unknown charset family!
#endif

/**
 * Converts an EBCDIC invariant character to ASCII.
 * @internal
 */
U_INTERNAL char U_EXPORT2
uprv_ebcdicToAscii(char c);

/**
 * \def uprv_invCharToAscii
 * Converts an invariant character to ASCII.
 * @internal
 */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define uprv_invCharToAscii(c) (c)
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
#else
#   error Unknown charset family!
#endif

/**
 * Converts an EBCDIC invariant character to lowercase ASCII.
 * @internal
 */
U_INTERNAL char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c);

/**
 * \def uprv_invCharToLowercaseAscii
 * Converts an invariant character to lowercase ASCII.
 * @internal
 */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#   define uprv_invCharToLowercaseAscii uprv_asciitolower
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#   define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
#else
#   error Unknown charset family!
#endif

/**
 * Copy EBCDIC to ASCII
 * @internal
 * @see uprv_strncpy
 */
U_INTERNAL uint8_t* U_EXPORT2
uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);


/**
 * Copy ASCII to EBCDIC
 * @internal
 * @see uprv_strncpy
 */
U_INTERNAL uint8_t* U_EXPORT2
uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);





U_NAMESPACE_BEGIN

CharString::CharString(CharString&& src) U_NOEXCEPT
        : buffer(std::move(src.buffer)), len(src.len) {
    src.len = 0;  // not strictly necessary because we make no guarantees on the source string
}

CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
    buffer = std::move(src.buffer);
    len = src.len;
    src.len = 0;  // not strictly necessary because we make no guarantees on the source string
    return *this;
}

char *CharString::cloneData(UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) { return nullptr; }
    char *p = static_cast<char *>(uprv_malloc(len + 1));
    if (p == nullptr) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }
    uprv_memcpy(p, buffer.getAlias(), len + 1);
    return p;
}

CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
    if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
        len=s.len;
        uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
    }
    return *this;
}

int32_t CharString::lastIndexOf(char c) const {
    for(int32_t i=len; i>0;) {
        if(buffer[--i]==c) {
            return i;
        }
    }
    return -1;
}

bool CharString::contains(StringPiece s) const {
    if (s.empty()) { return false; }
    const char *p = buffer.getAlias();
    int32_t lastStart = len - s.length();
    for (int32_t i = 0; i <= lastStart; ++i) {
        if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
            return true;
        }
    }
    return false;
}

CharString &CharString::truncate(int32_t newLength) {
    if(newLength<0) {
        newLength=0;
    }
    if(newLength<len) {
        buffer[len=newLength]=0;
    }
    return *this;
}

CharString &CharString::append(char c, UErrorCode &errorCode) {
    if(ensureCapacity(len+2, 0, errorCode)) {
        buffer[len++]=c;
        buffer[len]=0;
    }
    return *this;
}

CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return *this;
    }
    if(sLength<-1 || (s==NULL && sLength!=0)) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return *this;
    }
    if(sLength<0) {
        sLength= static_cast<int32_t>(uprv_strlen(s));
    }
    if(sLength>0) {
        if(s==(buffer.getAlias()+len)) {
            // The caller wrote into the getAppendBuffer().
            if(sLength>=(buffer.getCapacity()-len)) {
                // The caller wrote too much.
                errorCode=U_INTERNAL_PROGRAM_ERROR;
            } else {
                buffer[len+=sLength]=0;
            }
        } else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
                  sLength>=(buffer.getCapacity()-len)
        ) {
            // (Part of) this string is appended to itself which requires reallocation,
            // so we have to make a copy of the substring and append that.
            return append(CharString(s, sLength, errorCode), errorCode);
        } else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
            uprv_memcpy(buffer.getAlias()+len, s, sLength);
            buffer[len+=sLength]=0;
        }
    }
    return *this;
}

char *CharString::getAppendBuffer(int32_t minCapacity,
                                  int32_t desiredCapacityHint,
                                  int32_t &resultCapacity,
                                  UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        resultCapacity=0;
        return NULL;
    }
    int32_t appendCapacity=buffer.getCapacity()-len-1;  // -1 for NUL
    if(appendCapacity>=minCapacity) {
        resultCapacity=appendCapacity;
        return buffer.getAlias()+len;
    }
    if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
        resultCapacity=buffer.getCapacity()-len-1;
        return buffer.getAlias()+len;
    }
    resultCapacity=0;
    return NULL;
}

CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
    return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
}

CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return *this;
    }
    if (!uprv_isInvariantUString(uchars, ucharsLen)) {
        errorCode = U_INVARIANT_CONVERSION_ERROR;
        return *this;
    }
    if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) {
        u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen);
        len += ucharsLen;
        buffer[len] = 0;
    }
    return *this;
}

UBool CharString::ensureCapacity(int32_t capacity,
                                 int32_t desiredCapacityHint,
                                 UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return FALSE;
    }
    if(capacity>buffer.getCapacity()) {
        if(desiredCapacityHint==0) {
            desiredCapacityHint=capacity+buffer.getCapacity();
        }
        if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
            buffer.resize(capacity, len+1)==NULL
        ) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return FALSE;
        }
    }
    return TRUE;
}

CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return *this;
    }
    if(s.length()==0) {
        return *this;
    }
    char c;
    if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
        append(U_FILE_SEP_CHAR, errorCode);
    }
    append(s, errorCode);
    return *this;
}

CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
    char c;
    if(U_SUCCESS(errorCode) && len>0 &&
            (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
        append(U_FILE_SEP_CHAR, errorCode);
    }
    return *this;
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2002-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File cmemory.c      ICU Heap allocation.
*                     All ICU heap allocation, both for C and C++ new of ICU
*                     class types, comes through these functions.
*
*                     If you have a need to replace ICU allocation, this is the
*                     place to do it.
*
*                     Note that uprv_malloc(0) returns a non-NULL pointer, and
*                     that a subsequent free of that pointer value is a NOP.
*
******************************************************************************
*/




#include <stdlib.h>

/* uprv_malloc(0) returns a pointer to this read-only data. */
static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};

/* Function Pointers for user-supplied heap functions  */
static const void     *pContext;
static UMemAllocFn    *pAlloc;
static UMemReallocFn  *pRealloc;
static UMemFreeFn     *pFree;

#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
static int n=0;
static long b=0; 
#endif

U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#if 1
  putchar('>');
  fflush(stdout);
#else
  fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
#endif
#endif
    if (s > 0) {
        if (pAlloc) {
            return (*pAlloc)(pContext, s);
        } else {
            return uprv_default_malloc(s);
        }
    } else {
        return (void *)zeroMem;
    }
}

U_CAPI void * U_EXPORT2
uprv_realloc(void * buffer, size_t size) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
  putchar('~');
  fflush(stdout);
#endif
    if (buffer == zeroMem) {
        return uprv_malloc(size);
    } else if (size == 0) {
        if (pFree) {
            (*pFree)(pContext, buffer);
        } else {
            uprv_default_free(buffer);
        }
        return (void *)zeroMem;
    } else {
        if (pRealloc) {
            return (*pRealloc)(pContext, buffer, size);
        } else {
            return uprv_default_realloc(buffer, size);
        }
    }
}

U_CAPI void U_EXPORT2
uprv_free(void *buffer) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
  putchar('<');
  fflush(stdout);
#endif
    if (buffer != zeroMem) {
        if (pFree) {
            (*pFree)(pContext, buffer);
        } else {
            uprv_default_free(buffer);
        }
    }
}

U_CAPI void * U_EXPORT2
uprv_calloc(size_t num, size_t size) {
    void *mem = NULL;
    size *= num;
    mem = uprv_malloc(size);
    if (mem) {
        uprv_memset(mem, 0, size);
    }
    return mem;
}

U_CAPI void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f,  UErrorCode *status)
{
    if (U_FAILURE(*status)) {
        return;
    }
    if (a==NULL || r==NULL || f==NULL) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    pContext  = context;
    pAlloc    = a;
    pRealloc  = r;
    pFree     = f;
}


U_CFUNC UBool cmemory_cleanup(void) {
    pContext   = NULL;
    pAlloc     = NULL;
    pRealloc   = NULL;
    pFree      = NULL;
    return TRUE;
}
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2015-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  charstr.cpp
*/




// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File: cstr.h
*/








/**
 * ICU-internal class CStr, a small helper class to facilitate passing UnicodeStrings
 * to functions needing (const char *) strings, such as printf().
 *
 * It is intended primarily for use in debugging or in tests. Uses platform 
 * default code page conversion, which will do the best job possible,
 * but may be lossy, depending on the platform.
 *
 * If no other conversion is available, use invariant conversion and substitue
 * '?' for non-invariant characters.
 *
 * Example Usage:
 *   UnicodeString s = whatever;
 *   printf("%s", CStr(s)());
 *
 *   The explicit call to the CStr() constructor creates a temporary object.
 *   Operator () on the temporary object returns a (const char *) pointer.
 *   The lifetime of the (const char *) data is that of the temporary object,
 *   which works well when passing it as a parameter to another function, such as printf.
 */

U_NAMESPACE_BEGIN

class U_COMMON_API CStr : public UMemory {
  public:
    CStr(const UnicodeString &in);
    ~CStr();
    const char * operator ()() const;

  private:
    CharString s;
    CStr(const CStr &other);               //  Forbid copying of this class.
    CStr &operator =(const CStr &other);   //  Forbid assignment.
};

U_NAMESPACE_END






U_NAMESPACE_BEGIN

CStr::CStr(const UnicodeString &in) {
    UErrorCode status = U_ZERO_ERROR;
#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
    int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
    int32_t resultCapacity = 0;
    char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
    if (U_SUCCESS(status)) {
        in.extract(0, in.length(), buf, resultCapacity);
        s.append(buf, length, status);
    }
#else
    // No conversion available. Convert any invariant characters; substitute '?' for the rest.
    // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the 
    //       whole string because they require that the entire input be invariant.
    char buf[2];
    for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) {
        if (uprv_isInvariantUString(in.getBuffer()+i, 1)) {
            u_UCharsToChars(in.getBuffer()+i, buf, 1);
        } else {
            buf[0] = '?';
        }
        s.append(buf, 1, status);
    }
#endif
}

CStr::~CStr() {
}

const char * CStr::operator ()() const {
    return s.data();
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.C
*
* @author       Helena Shih
*
* Modification History:
*
*   Date        Name        Description
*   6/18/98     hshih       Created
*   09/08/98    stephen     Added include for ctype, for Mac Port
*   11/15/99    helena      Integrated S/390 IEEE changes. 
******************************************************************************
*/



#include <stdlib.h>
#include <stdio.h>





/*
 * We hardcode case conversion for invariant characters to match our expectation
 * and the compiler execution charset.
 * This prevents problems on systems
 * - with non-default casing behavior, like Turkish system locales where
 *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
 * - where there are no lowercase Latin characters at all, or using different
 *   codes (some old EBCDIC codepages)
 *
 * This works because the compiler usually runs on a platform where the execution
 * charset includes all of the invariant characters at their expected
 * code positions, so that the char * string literals in ICU code match
 * the char literals here.
 *
 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
 * and the set of uppercase Latin letters is discontiguous as well.
 */

U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    return
        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
#else
    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
#endif
}

U_CAPI char U_EXPORT2
uprv_toupper(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
        c=(char)(c+('A'-'a'));
    }
#else
    if('a'<=c && c<='z') {
        c=(char)(c+('A'-'a'));
    }
#endif
    return c;
}


#if 0
/*
 * Commented out because cstring.h defines uprv_tolower() to be
 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
 * to reduce the amount of code to cover with tests.
 *
 * Note that this uprv_tolower() definition is likely to work for most
 * charset families, not just ASCII and EBCDIC, because its #else branch
 * is written generically.
 */
U_CAPI char U_EXPORT2
uprv_tolower(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
        c=(char)(c+('a'-'A'));
    }
#else
    if('A'<=c && c<='Z') {
        c=(char)(c+('a'-'A'));
    }
#endif
    return c;
}
#endif

U_CAPI char U_EXPORT2
uprv_asciitolower(char c) {
    if(0x41<=c && c<=0x5a) {
        c=(char)(c+0x20);
    }
    return c;
}

U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c) {
    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
    ) {
        c=(char)(c-0x40);
    }
    return c;
}


U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str)
{
    char* origPtr = str;

    if (str) {
        do
            *str = (char)uprv_tolower(*str);
        while (*(str++));
    }

    return origPtr;
}

U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str)
{
    char* origPtr = str;

    if (str) {
        do
            *str = (char)uprv_toupper(*str);
        while (*(str++));
    }

    return origPtr;
}

/*
 * Takes a int32_t and fills in  a char* string with that number "radix"-based.
 * Does not handle negative values (makes an empty string for them).
 * Writes at most 12 chars ("-2147483647" plus NUL).
 * Returns the length of the string (not including the NUL).
 */
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
{
    char      tbuf[30];
    int32_t   tbx    = sizeof(tbuf);
    uint8_t   digit;
    int32_t   length = 0;
    uint32_t  uval;
    
    U_ASSERT(radix>=2 && radix<=16);
    uval = (uint32_t) v;
    if(v<0 && radix == 10) {
        /* Only in base 10 do we conside numbers to be signed. */
        uval = (uint32_t)(-v); 
        buffer[length++] = '-';
    }
    
    tbx = sizeof(tbuf)-1;
    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    do {
        digit = (uint8_t)(uval % radix);
        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
        uval  = uval / radix;
    } while (uval != 0);
    
    /* copy converted number into user buffer  */
    uprv_strcpy(buffer+length, tbuf+tbx);
    length += sizeof(tbuf) - tbx -1;
    return length;
}



/*
 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
 * Returns the length of the string, not including the terminating NULL.
 */
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
{
    char      tbuf[30];
    int32_t   tbx    = sizeof(tbuf);
    uint8_t   digit;
    int32_t   length = 0;
    uint64_t  uval;
    
    U_ASSERT(radix>=2 && radix<=16);
    uval = (uint64_t) v;
    if(v<0 && radix == 10) {
        /* Only in base 10 do we conside numbers to be signed. */
        uval = (uint64_t)(-v); 
        buffer[length++] = '-';
    }
    
    tbx = sizeof(tbuf)-1;
    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    do {
        digit = (uint8_t)(uval % radix);
        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
        uval  = uval / radix;
    } while (uval != 0);
    
    /* copy converted number into user buffer  */
    uprv_strcpy(buffer+length, tbuf+tbx);
    length += sizeof(tbuf) - tbx -1;
    return length;
}


U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
    char *end;
    return uprv_strtoul(integerString, &end, radix);

}

U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2) {
    if(str1==NULL) {
        if(str2==NULL) {
            return 0;
        } else {
            return -1;
        }
    } else if(str2==NULL) {
        return 1;
    } else {
        /* compare non-NULL strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

        for(;;) {
            c1=(unsigned char)*str1;
            c2=(unsigned char)*str2;
            if(c1==0) {
                if(c2==0) {
                    return 0;
                } else {
                    return -1;
                }
            } else if(c2==0) {
                return 1;
            } else {
                /* compare non-zero characters with lowercase */
                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
                if(rc!=0) {
                    return rc;
                }
            }
            ++str1;
            ++str2;
        }
    }
}

U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
    if(str1==NULL) {
        if(str2==NULL) {
            return 0;
        } else {
            return -1;
        }
    } else if(str2==NULL) {
        return 1;
    } else {
        /* compare non-NULL strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

        for(; n--;) {
            c1=(unsigned char)*str1;
            c2=(unsigned char)*str2;
            if(c1==0) {
                if(c2==0) {
                    return 0;
                } else {
                    return -1;
                }
            } else if(c2==0) {
                return 1;
            } else {
                /* compare non-zero characters with lowercase */
                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
                if(rc!=0) {
                    return rc;
                }
            }
            ++str1;
            ++str2;
        }
    }

    return 0;
}

U_CAPI char* U_EXPORT2
uprv_strdup(const char *src) {
    size_t len = uprv_strlen(src) + 1;
    char *dup = (char *) uprv_malloc(len);

    if (dup) {
        uprv_memcpy(dup, src, len);
    }

    return dup;
}

U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n) {
    char *dup;

    if(n < 0) {
        dup = uprv_strdup(src);
    } else {
        dup = (char*)uprv_malloc(n+1);
        if (dup) { 
            uprv_memcpy(dup, src, n);
            dup[n] = 0;
        }
    }

    return dup;
}
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*  
******************************************************************************
*
*   Copyright (C) 2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  cwchar.c
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001may25
*   created by: Markus W. Scherer
*/



#if !U_HAVE_WCSCPY

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*  
******************************************************************************
*
*   Copyright (C) 2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  cwchar.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2001may25
*   created by: Markus W. Scherer
*
*   This file contains ICU-internal definitions of wchar_t operations.
*   These definitions were moved here from cstring.h so that fewer
*   ICU implementation files include wchar.h.
*/


#include <string.h>
#include <stdlib.h>


/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
#if U_HAVE_WCHAR_H
#   include <wchar.h>
#endif

/*===========================================================================*/
/* Wide-character functions                                                  */
/*===========================================================================*/

/* The following are not available on all systems, defined in wchar.h or string.h. */
#if U_HAVE_WCSCPY
#   define uprv_wcscpy wcscpy
#   define uprv_wcscat wcscat
#   define uprv_wcslen wcslen
#else
U_CAPI wchar_t* U_EXPORT2 
uprv_wcscpy(wchar_t *dst, const wchar_t *src);
U_CAPI wchar_t* U_EXPORT2 
uprv_wcscat(wchar_t *dst, const wchar_t *src);
U_CAPI size_t U_EXPORT2 
uprv_wcslen(const wchar_t *src);
#endif

/* The following are part of the ANSI C standard, defined in stdlib.h . */
#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)




U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
    wchar_t *start=dst;
    while(*dst!=0) {
        ++dst;
    }
    while((*dst=*src)!=0) {
        ++dst;
        ++src;
    }
    return start;
}

U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
    wchar_t *start=dst;
    while((*dst=*src)!=0) {
        ++dst;
        ++src;
    }
    return start;
}

U_CAPI size_t uprv_wcslen(const wchar_t *src) {
    const wchar_t *start=src;
    while(*src!=0) {
        ++src;
    }
    return src-start;
}

#endif

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/**
 *******************************************************************************
 * Copyright (C) 2006-2016, International Business Machines Corporation
 * and others. All Rights Reserved.
 *******************************************************************************
 */

#include <utility>



// #if !UCONFIG_NO_BREAK_ITERATION

// #include "brkeng.h"
// #include "dictbe.h"
// #include "unicode/uniset.h"
// #include "unicode/chariter.h"
// #include "unicode/ubrk.h"
// #include "uvectr32.h"
// #include "uvector.h"
// #include "uassert.h"
// #include "unicode/normlzr.h"
// #include "cmemory.h"
// #include "dictionarydata.h"

// U_NAMESPACE_BEGIN

// /*
//  ******************************************************************
//  */

// DictionaryBreakEngine::DictionaryBreakEngine() {
// }

// DictionaryBreakEngine::~DictionaryBreakEngine() {
// }

// UBool
// DictionaryBreakEngine::handles(UChar32 c) const {
//     return fSet.contains(c);
// }

// int32_t
// DictionaryBreakEngine::findBreaks( UText *text,
//                                  int32_t startPos,
//                                  int32_t endPos,
//                                  UVector32 &foundBreaks ) const {
//     (void)startPos;            // TODO: remove this param?
//     int32_t result = 0;

//     // Find the span of characters included in the set.
//     //   The span to break begins at the current position in the text, and
//     //   extends towards the start or end of the text, depending on 'reverse'.

//     int32_t start = (int32_t)utext_getNativeIndex(text);
//     int32_t current;
//     int32_t rangeStart;
//     int32_t rangeEnd;
//     UChar32 c = utext_current32(text);
//     while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
//         utext_next32(text);         // TODO:  recast loop for postincrement
//         c = utext_current32(text);
//     }
//     rangeStart = start;
//     rangeEnd = current;
//     result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
//     utext_setNativeIndex(text, current);

//     return result;
// }

// void
// DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
//     fSet = set;
//     // Compact for caching
//     fSet.compact();
// }

// /*
//  ******************************************************************
//  * PossibleWord
//  */

// // Helper class for improving readability of the Thai/Lao/Khmer word break
// // algorithm. The implementation is completely inline.

// // List size, limited by the maximum number of words in the dictionary
// // that form a nested sequence.
// static const int32_t POSSIBLE_WORD_LIST_MAX = 20;

// class PossibleWord {
// private:
//     // list of word candidate lengths, in increasing length order
//     // TODO: bytes would be sufficient for word lengths.
//     int32_t   count;      // Count of candidates
//     int32_t   prefix;     // The longest match with a dictionary word
//     int32_t   offset;     // Offset in the text of these candidates
//     int32_t   mark;       // The preferred candidate's offset
//     int32_t   current;    // The candidate we're currently looking at
//     int32_t   cuLengths[POSSIBLE_WORD_LIST_MAX];   // Word Lengths, in code units.
//     int32_t   cpLengths[POSSIBLE_WORD_LIST_MAX];   // Word Lengths, in code points.

// public:
//     PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {}
//     ~PossibleWord() {}

//     // Fill the list of candidates if needed, select the longest, and return the number found
//     int32_t   candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );

//     // Select the currently marked candidate, point after it in the text, and invalidate self
//     int32_t   acceptMarked( UText *text );

//     // Back up from the current candidate to the next shorter one; return TRUE if that exists
//     // and point the text after it
//     UBool     backUp( UText *text );

//     // Return the longest prefix this candidate location shares with a dictionary word
//     // Return value is in code points.
//     int32_t   longestPrefix() { return prefix; }

//     // Mark the current candidate as the one we like
//     void      markCurrent() { mark = current; }

//     // Get length in code points of the marked word.
//     int32_t   markedCPLength() { return cpLengths[mark]; }
// };


// int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
//     // TODO: If getIndex is too slow, use offset < 0 and add discardAll()
//     int32_t start = (int32_t)utext_getNativeIndex(text);
//     if (start != offset) {
//         offset = start;
//         count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix);
//         // Dictionary leaves text after longest prefix, not longest word. Back up.
//         if (count <= 0) {
//             utext_setNativeIndex(text, start);
//         }
//     }
//     if (count > 0) {
//         utext_setNativeIndex(text, start+cuLengths[count-1]);
//     }
//     current = count-1;
//     mark = current;
//     return count;
// }

// int32_t
// PossibleWord::acceptMarked( UText *text ) {
//     utext_setNativeIndex(text, offset + cuLengths[mark]);
//     return cuLengths[mark];
// }


// UBool
// PossibleWord::backUp( UText *text ) {
//     if (current > 0) {
//         utext_setNativeIndex(text, offset + cuLengths[--current]);
//         return TRUE;
//     }
//     return FALSE;
// }

// /*
//  ******************************************************************
//  * ThaiBreakEngine
//  */

// // How many words in a row are "good enough"?
// static const int32_t THAI_LOOKAHEAD = 3;

// // Will not combine a non-word with a preceding dictionary word longer than this
// static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;

// // Will not combine a non-word that shares at least this much prefix with a
// // dictionary word, with a preceding word
// static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;

// // Ellision character
// static const int32_t THAI_PAIYANNOI = 0x0E2F;

// // Repeat character
// static const int32_t THAI_MAIYAMOK = 0x0E46;

// // Minimum word size
// static const int32_t THAI_MIN_WORD = 2;

// // Minimum number of characters for two words
// static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;

// ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
//     : DictionaryBreakEngine(),
//       fDictionary(adoptDictionary)
// {
//     fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
//     if (U_SUCCESS(status)) {
//         setCharacters(fThaiWordSet);
//     }
//     fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
//     fMarkSet.add(0x0020);
//     fEndWordSet = fThaiWordSet;
//     fEndWordSet.remove(0x0E31);             // MAI HAN-AKAT
//     fEndWordSet.remove(0x0E40, 0x0E44);     // SARA E through SARA AI MAIMALAI
//     fBeginWordSet.add(0x0E01, 0x0E2E);      // KO KAI through HO NOKHUK
//     fBeginWordSet.add(0x0E40, 0x0E44);      // SARA E through SARA AI MAIMALAI
//     fSuffixSet.add(THAI_PAIYANNOI);
//     fSuffixSet.add(THAI_MAIYAMOK);

//     // Compact for caching.
//     fMarkSet.compact();
//     fEndWordSet.compact();
//     fBeginWordSet.compact();
//     fSuffixSet.compact();
// }

// ThaiBreakEngine::~ThaiBreakEngine() {
//     delete fDictionary;
// }

// int32_t
// ThaiBreakEngine::divideUpDictionaryRange( UText *text,
//                                                 int32_t rangeStart,
//                                                 int32_t rangeEnd,
//                                                 UVector32 &foundBreaks ) const {
//     utext_setNativeIndex(text, rangeStart);
//     utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
//     if (utext_getNativeIndex(text) >= rangeEnd) {
//         return 0;       // Not enough characters for two words
//     }
//     utext_setNativeIndex(text, rangeStart);


//     uint32_t wordsFound = 0;
//     int32_t cpWordLength = 0;    // Word Length in Code Points.
//     int32_t cuWordLength = 0;    // Word length in code units (UText native indexing)
//     int32_t current;
//     UErrorCode status = U_ZERO_ERROR;
//     PossibleWord words[THAI_LOOKAHEAD];

//     utext_setNativeIndex(text, rangeStart);

//     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
//         cpWordLength = 0;
//         cuWordLength = 0;

//         // Look for candidate words at the current position
//         int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);

//         // If we found exactly one, use that
//         if (candidates == 1) {
//             cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }
//         // If there was more than one, see which one can take us forward the most words
//         else if (candidates > 1) {
//             // If we're already at the end of the range, we're done
//             if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
//                 goto foundBest;
//             }
//             do {
//                 int32_t wordsMatched = 1;
//                 if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
//                     if (wordsMatched < 2) {
//                         // Followed by another dictionary word; mark first word as a good candidate
//                         words[wordsFound%THAI_LOOKAHEAD].markCurrent();
//                         wordsMatched = 2;
//                     }

//                     // If we're already at the end of the range, we're done
//                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
//                         goto foundBest;
//                     }

//                     // See if any of the possible second words is followed by a third word
//                     do {
//                         // If we find a third word, stop right away
//                         if (words[(wordsFound + 2) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
//                             words[wordsFound % THAI_LOOKAHEAD].markCurrent();
//                             goto foundBest;
//                         }
//                     }
//                     while (words[(wordsFound + 1) % THAI_LOOKAHEAD].backUp(text));
//                 }
//             }
//             while (words[wordsFound % THAI_LOOKAHEAD].backUp(text));
// foundBest:
//             // Set UText position to after the accepted word.
//             cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }

//         // We come here after having either found a word or not. We look ahead to the
//         // next word. If it's not a dictionary word, we will combine it with the word we
//         // just found (if there is one), but only if the preceding word does not exceed
//         // the threshold.
//         // The text iterator should now be positioned at the end of the word we found.

//         UChar32 uc = 0;
//         if ((int32_t)utext_getNativeIndex(text) < rangeEnd &&  cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
//             // if it is a dictionary word, do nothing. If it isn't, then if there is
//             // no preceding word, or the non-word shares less than the minimum threshold
//             // of characters with a dictionary word, then scan to resynchronize
//             if (words[wordsFound % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
//                   && (cuWordLength == 0
//                       || words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
//                 // Look for a plausible word boundary
//                 int32_t remaining = rangeEnd - (current+cuWordLength);
//                 UChar32 pc;
//                 int32_t chars = 0;
//                 for (;;) {
//                     int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
//                     pc = utext_next32(text);
//                     int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
//                     chars += pcSize;
//                     remaining -= pcSize;
//                     if (remaining <= 0) {
//                         break;
//                     }
//                     uc = utext_current32(text);
//                     if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
//                         // Maybe. See if it's in the dictionary.
//                         // NOTE: In the original Apple code, checked that the next
//                         // two characters after uc were not 0x0E4C THANTHAKHAT before
//                         // checking the dictionary. That is just a performance filter,
//                         // but it's not clear it's faster than checking the trie.
//                         int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
//                         utext_setNativeIndex(text, current + cuWordLength + chars);
//                         if (num_candidates > 0) {
//                             break;
//                         }
//                     }
//                 }

//                 // Bump the word count if there wasn't already one
//                 if (cuWordLength <= 0) {
//                     wordsFound += 1;
//                 }

//                 // Update the length with the passed-over characters
//                 cuWordLength += chars;
//             }
//             else {
//                 // Back up to where we were for next iteration
//                 utext_setNativeIndex(text, current+cuWordLength);
//             }
//         }

//         // Never stop before a combining mark.
//         int32_t currPos;
//         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
//             utext_next32(text);
//             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
//         }

//         // Look ahead for possible suffixes if a dictionary word does not follow.
//         // We do this in code rather than using a rule so that the heuristic
//         // resynch continues to function. For example, one of the suffix characters
//         // could be a typo in the middle of a word.
//         if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) {
//             if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
//                 && fSuffixSet.contains(uc = utext_current32(text))) {
//                 if (uc == THAI_PAIYANNOI) {
//                     if (!fSuffixSet.contains(utext_previous32(text))) {
//                         // Skip over previous end and PAIYANNOI
//                         utext_next32(text);
//                         int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text);
//                         utext_next32(text);
//                         cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex;    // Add PAIYANNOI to word
//                         uc = utext_current32(text);     // Fetch next character
//                     }
//                     else {
//                         // Restore prior position
//                         utext_next32(text);
//                     }
//                 }
//                 if (uc == THAI_MAIYAMOK) {
//                     if (utext_previous32(text) != THAI_MAIYAMOK) {
//                         // Skip over previous end and MAIYAMOK
//                         utext_next32(text);
//                         int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text);
//                         utext_next32(text);
//                         cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex;    // Add MAIYAMOK to word
//                     }
//                     else {
//                         // Restore prior position
//                         utext_next32(text);
//                     }
//                 }
//             }
//             else {
//                 utext_setNativeIndex(text, current+cuWordLength);
//             }
//         }

//         // Did we find a word on this iteration? If so, push it on the break stack
//         if (cuWordLength > 0) {
//             foundBreaks.push((current+cuWordLength), status);
//         }
//     }

//     // Don't return a break for the end of the dictionary range if there is one there.
//     if (foundBreaks.peeki() >= rangeEnd) {
//         (void) foundBreaks.popi();
//         wordsFound -= 1;
//     }

//     return wordsFound;
// }

// /*
//  ******************************************************************
//  * LaoBreakEngine
//  */

// // How many words in a row are "good enough"?
// static const int32_t LAO_LOOKAHEAD = 3;

// // Will not combine a non-word with a preceding dictionary word longer than this
// static const int32_t LAO_ROOT_COMBINE_THRESHOLD = 3;

// // Will not combine a non-word that shares at least this much prefix with a
// // dictionary word, with a preceding word
// static const int32_t LAO_PREFIX_COMBINE_THRESHOLD = 3;

// // Minimum word size
// static const int32_t LAO_MIN_WORD = 2;

// // Minimum number of characters for two words
// static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;

// LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
//     : DictionaryBreakEngine(),
//       fDictionary(adoptDictionary)
// {
//     fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
//     if (U_SUCCESS(status)) {
//         setCharacters(fLaoWordSet);
//     }
//     fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
//     fMarkSet.add(0x0020);
//     fEndWordSet = fLaoWordSet;
//     fEndWordSet.remove(0x0EC0, 0x0EC4);     // prefix vowels
//     fBeginWordSet.add(0x0E81, 0x0EAE);      // basic consonants (including holes for corresponding Thai characters)
//     fBeginWordSet.add(0x0EDC, 0x0EDD);      // digraph consonants (no Thai equivalent)
//     fBeginWordSet.add(0x0EC0, 0x0EC4);      // prefix vowels

//     // Compact for caching.
//     fMarkSet.compact();
//     fEndWordSet.compact();
//     fBeginWordSet.compact();
// }

// LaoBreakEngine::~LaoBreakEngine() {
//     delete fDictionary;
// }

// int32_t
// LaoBreakEngine::divideUpDictionaryRange( UText *text,
//                                                 int32_t rangeStart,
//                                                 int32_t rangeEnd,
//                                                 UVector32 &foundBreaks ) const {
//     if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
//         return 0;       // Not enough characters for two words
//     }

//     uint32_t wordsFound = 0;
//     int32_t cpWordLength = 0;
//     int32_t cuWordLength = 0;
//     int32_t current;
//     UErrorCode status = U_ZERO_ERROR;
//     PossibleWord words[LAO_LOOKAHEAD];

//     utext_setNativeIndex(text, rangeStart);

//     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
//         cuWordLength = 0;
//         cpWordLength = 0;

//         // Look for candidate words at the current position
//         int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);

//         // If we found exactly one, use that
//         if (candidates == 1) {
//             cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }
//         // If there was more than one, see which one can take us forward the most words
//         else if (candidates > 1) {
//             // If we're already at the end of the range, we're done
//             if (utext_getNativeIndex(text) >= rangeEnd) {
//                 goto foundBest;
//             }
//             do {
//                 int32_t wordsMatched = 1;
//                 if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
//                     if (wordsMatched < 2) {
//                         // Followed by another dictionary word; mark first word as a good candidate
//                         words[wordsFound%LAO_LOOKAHEAD].markCurrent();
//                         wordsMatched = 2;
//                     }

//                     // If we're already at the end of the range, we're done
//                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
//                         goto foundBest;
//                     }

//                     // See if any of the possible second words is followed by a third word
//                     do {
//                         // If we find a third word, stop right away
//                         if (words[(wordsFound + 2) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
//                             words[wordsFound % LAO_LOOKAHEAD].markCurrent();
//                             goto foundBest;
//                         }
//                     }
//                     while (words[(wordsFound + 1) % LAO_LOOKAHEAD].backUp(text));
//                 }
//             }
//             while (words[wordsFound % LAO_LOOKAHEAD].backUp(text));
// foundBest:
//             cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }

//         // We come here after having either found a word or not. We look ahead to the
//         // next word. If it's not a dictionary word, we will combine it withe the word we
//         // just found (if there is one), but only if the preceding word does not exceed
//         // the threshold.
//         // The text iterator should now be positioned at the end of the word we found.
//         if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
//             // if it is a dictionary word, do nothing. If it isn't, then if there is
//             // no preceding word, or the non-word shares less than the minimum threshold
//             // of characters with a dictionary word, then scan to resynchronize
//             if (words[wordsFound % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
//                   && (cuWordLength == 0
//                       || words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) {
//                 // Look for a plausible word boundary
//                 int32_t remaining = rangeEnd - (current + cuWordLength);
//                 UChar32 pc;
//                 UChar32 uc;
//                 int32_t chars = 0;
//                 for (;;) {
//                     int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
//                     pc = utext_next32(text);
//                     int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
//                     chars += pcSize;
//                     remaining -= pcSize;
//                     if (remaining <= 0) {
//                         break;
//                     }
//                     uc = utext_current32(text);
//                     if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
//                         // Maybe. See if it's in the dictionary.
//                         // TODO: this looks iffy; compare with old code.
//                         int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
//                         utext_setNativeIndex(text, current + cuWordLength + chars);
//                         if (num_candidates > 0) {
//                             break;
//                         }
//                     }
//                 }

//                 // Bump the word count if there wasn't already one
//                 if (cuWordLength <= 0) {
//                     wordsFound += 1;
//                 }

//                 // Update the length with the passed-over characters
//                 cuWordLength += chars;
//             }
//             else {
//                 // Back up to where we were for next iteration
//                 utext_setNativeIndex(text, current + cuWordLength);
//             }
//         }

//         // Never stop before a combining mark.
//         int32_t currPos;
//         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
//             utext_next32(text);
//             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
//         }

//         // Look ahead for possible suffixes if a dictionary word does not follow.
//         // We do this in code rather than using a rule so that the heuristic
//         // resynch continues to function. For example, one of the suffix characters
//         // could be a typo in the middle of a word.
//         // NOT CURRENTLY APPLICABLE TO LAO

//         // Did we find a word on this iteration? If so, push it on the break stack
//         if (cuWordLength > 0) {
//             foundBreaks.push((current+cuWordLength), status);
//         }
//     }

//     // Don't return a break for the end of the dictionary range if there is one there.
//     if (foundBreaks.peeki() >= rangeEnd) {
//         (void) foundBreaks.popi();
//         wordsFound -= 1;
//     }

//     return wordsFound;
// }

// /*
//  ******************************************************************
//  * BurmeseBreakEngine
//  */

// // How many words in a row are "good enough"?
// static const int32_t BURMESE_LOOKAHEAD = 3;

// // Will not combine a non-word with a preceding dictionary word longer than this
// static const int32_t BURMESE_ROOT_COMBINE_THRESHOLD = 3;

// // Will not combine a non-word that shares at least this much prefix with a
// // dictionary word, with a preceding word
// static const int32_t BURMESE_PREFIX_COMBINE_THRESHOLD = 3;

// // Minimum word size
// static const int32_t BURMESE_MIN_WORD = 2;

// // Minimum number of characters for two words
// static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;

// BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
//     : DictionaryBreakEngine(),
//       fDictionary(adoptDictionary)
// {
//     fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
//     if (U_SUCCESS(status)) {
//         setCharacters(fBurmeseWordSet);
//     }
//     fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
//     fMarkSet.add(0x0020);
//     fEndWordSet = fBurmeseWordSet;
//     fBeginWordSet.add(0x1000, 0x102A);      // basic consonants and independent vowels

//     // Compact for caching.
//     fMarkSet.compact();
//     fEndWordSet.compact();
//     fBeginWordSet.compact();
// }

// BurmeseBreakEngine::~BurmeseBreakEngine() {
//     delete fDictionary;
// }

// int32_t
// BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
//                                                 int32_t rangeStart,
//                                                 int32_t rangeEnd,
//                                                 UVector32 &foundBreaks ) const {
//     if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
//         return 0;       // Not enough characters for two words
//     }

//     uint32_t wordsFound = 0;
//     int32_t cpWordLength = 0;
//     int32_t cuWordLength = 0;
//     int32_t current;
//     UErrorCode status = U_ZERO_ERROR;
//     PossibleWord words[BURMESE_LOOKAHEAD];

//     utext_setNativeIndex(text, rangeStart);

//     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
//         cuWordLength = 0;
//         cpWordLength = 0;

//         // Look for candidate words at the current position
//         int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);

//         // If we found exactly one, use that
//         if (candidates == 1) {
//             cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }
//         // If there was more than one, see which one can take us forward the most words
//         else if (candidates > 1) {
//             // If we're already at the end of the range, we're done
//             if (utext_getNativeIndex(text) >= rangeEnd) {
//                 goto foundBest;
//             }
//             do {
//                 int32_t wordsMatched = 1;
//                 if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
//                     if (wordsMatched < 2) {
//                         // Followed by another dictionary word; mark first word as a good candidate
//                         words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
//                         wordsMatched = 2;
//                     }

//                     // If we're already at the end of the range, we're done
//                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
//                         goto foundBest;
//                     }

//                     // See if any of the possible second words is followed by a third word
//                     do {
//                         // If we find a third word, stop right away
//                         if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
//                             words[wordsFound % BURMESE_LOOKAHEAD].markCurrent();
//                             goto foundBest;
//                         }
//                     }
//                     while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].backUp(text));
//                 }
//             }
//             while (words[wordsFound % BURMESE_LOOKAHEAD].backUp(text));
// foundBest:
//             cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }

//         // We come here after having either found a word or not. We look ahead to the
//         // next word. If it's not a dictionary word, we will combine it withe the word we
//         // just found (if there is one), but only if the preceding word does not exceed
//         // the threshold.
//         // The text iterator should now be positioned at the end of the word we found.
//         if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
//             // if it is a dictionary word, do nothing. If it isn't, then if there is
//             // no preceding word, or the non-word shares less than the minimum threshold
//             // of characters with a dictionary word, then scan to resynchronize
//             if (words[wordsFound % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
//                   && (cuWordLength == 0
//                       || words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) {
//                 // Look for a plausible word boundary
//                 int32_t remaining = rangeEnd - (current + cuWordLength);
//                 UChar32 pc;
//                 UChar32 uc;
//                 int32_t chars = 0;
//                 for (;;) {
//                     int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
//                     pc = utext_next32(text);
//                     int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
//                     chars += pcSize;
//                     remaining -= pcSize;
//                     if (remaining <= 0) {
//                         break;
//                     }
//                     uc = utext_current32(text);
//                     if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
//                         // Maybe. See if it's in the dictionary.
//                         // TODO: this looks iffy; compare with old code.
//                         int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
//                         utext_setNativeIndex(text, current + cuWordLength + chars);
//                         if (num_candidates > 0) {
//                             break;
//                         }
//                     }
//                 }

//                 // Bump the word count if there wasn't already one
//                 if (cuWordLength <= 0) {
//                     wordsFound += 1;
//                 }

//                 // Update the length with the passed-over characters
//                 cuWordLength += chars;
//             }
//             else {
//                 // Back up to where we were for next iteration
//                 utext_setNativeIndex(text, current + cuWordLength);
//             }
//         }

//         // Never stop before a combining mark.
//         int32_t currPos;
//         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
//             utext_next32(text);
//             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
//         }

//         // Look ahead for possible suffixes if a dictionary word does not follow.
//         // We do this in code rather than using a rule so that the heuristic
//         // resynch continues to function. For example, one of the suffix characters
//         // could be a typo in the middle of a word.
//         // NOT CURRENTLY APPLICABLE TO BURMESE

//         // Did we find a word on this iteration? If so, push it on the break stack
//         if (cuWordLength > 0) {
//             foundBreaks.push((current+cuWordLength), status);
//         }
//     }

//     // Don't return a break for the end of the dictionary range if there is one there.
//     if (foundBreaks.peeki() >= rangeEnd) {
//         (void) foundBreaks.popi();
//         wordsFound -= 1;
//     }

//     return wordsFound;
// }

// /*
//  ******************************************************************
//  * KhmerBreakEngine
//  */

// // How many words in a row are "good enough"?
// static const int32_t KHMER_LOOKAHEAD = 3;

// // Will not combine a non-word with a preceding dictionary word longer than this
// static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3;

// // Will not combine a non-word that shares at least this much prefix with a
// // dictionary word, with a preceding word
// static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3;

// // Minimum word size
// static const int32_t KHMER_MIN_WORD = 2;

// // Minimum number of characters for two words
// static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;

// KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
//     : DictionaryBreakEngine(),
//       fDictionary(adoptDictionary)
// {
//     fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
//     if (U_SUCCESS(status)) {
//         setCharacters(fKhmerWordSet);
//     }
//     fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
//     fMarkSet.add(0x0020);
//     fEndWordSet = fKhmerWordSet;
//     fBeginWordSet.add(0x1780, 0x17B3);
//     //fBeginWordSet.add(0x17A3, 0x17A4);      // deprecated vowels
//     //fEndWordSet.remove(0x17A5, 0x17A9);     // Khmer independent vowels that can't end a word
//     //fEndWordSet.remove(0x17B2);             // Khmer independent vowel that can't end a word
//     fEndWordSet.remove(0x17D2);             // KHMER SIGN COENG that combines some following characters
//     //fEndWordSet.remove(0x17B6, 0x17C5);     // Remove dependent vowels
// //    fEndWordSet.remove(0x0E31);             // MAI HAN-AKAT
// //    fEndWordSet.remove(0x0E40, 0x0E44);     // SARA E through SARA AI MAIMALAI
// //    fBeginWordSet.add(0x0E01, 0x0E2E);      // KO KAI through HO NOKHUK
// //    fBeginWordSet.add(0x0E40, 0x0E44);      // SARA E through SARA AI MAIMALAI
// //    fSuffixSet.add(THAI_PAIYANNOI);
// //    fSuffixSet.add(THAI_MAIYAMOK);

//     // Compact for caching.
//     fMarkSet.compact();
//     fEndWordSet.compact();
//     fBeginWordSet.compact();
// //    fSuffixSet.compact();
// }

// KhmerBreakEngine::~KhmerBreakEngine() {
//     delete fDictionary;
// }

// int32_t
// KhmerBreakEngine::divideUpDictionaryRange( UText *text,
//                                                 int32_t rangeStart,
//                                                 int32_t rangeEnd,
//                                                 UVector32 &foundBreaks ) const {
//     if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
//         return 0;       // Not enough characters for two words
//     }

//     uint32_t wordsFound = 0;
//     int32_t cpWordLength = 0;
//     int32_t cuWordLength = 0;
//     int32_t current;
//     UErrorCode status = U_ZERO_ERROR;
//     PossibleWord words[KHMER_LOOKAHEAD];

//     utext_setNativeIndex(text, rangeStart);

//     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
//         cuWordLength = 0;
//         cpWordLength = 0;

//         // Look for candidate words at the current position
//         int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);

//         // If we found exactly one, use that
//         if (candidates == 1) {
//             cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }

//         // If there was more than one, see which one can take us forward the most words
//         else if (candidates > 1) {
//             // If we're already at the end of the range, we're done
//             if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
//                 goto foundBest;
//             }
//             do {
//                 int32_t wordsMatched = 1;
//                 if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
//                     if (wordsMatched < 2) {
//                         // Followed by another dictionary word; mark first word as a good candidate
//                         words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
//                         wordsMatched = 2;
//                     }

//                     // If we're already at the end of the range, we're done
//                     if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
//                         goto foundBest;
//                     }

//                     // See if any of the possible second words is followed by a third word
//                     do {
//                         // If we find a third word, stop right away
//                         if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
//                             words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
//                             goto foundBest;
//                         }
//                     }
//                     while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
//                 }
//             }
//             while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text));
// foundBest:
//             cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
//             cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
//             wordsFound += 1;
//         }

//         // We come here after having either found a word or not. We look ahead to the
//         // next word. If it's not a dictionary word, we will combine it with the word we
//         // just found (if there is one), but only if the preceding word does not exceed
//         // the threshold.
//         // The text iterator should now be positioned at the end of the word we found.
//         if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
//             // if it is a dictionary word, do nothing. If it isn't, then if there is
//             // no preceding word, or the non-word shares less than the minimum threshold
//             // of characters with a dictionary word, then scan to resynchronize
//             if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
//                   && (cuWordLength == 0
//                       || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
//                 // Look for a plausible word boundary
//                 int32_t remaining = rangeEnd - (current+cuWordLength);
//                 UChar32 pc;
//                 UChar32 uc;
//                 int32_t chars = 0;
//                 for (;;) {
//                     int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
//                     pc = utext_next32(text);
//                     int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
//                     chars += pcSize;
//                     remaining -= pcSize;
//                     if (remaining <= 0) {
//                         break;
//                     }
//                     uc = utext_current32(text);
//                     if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
//                         // Maybe. See if it's in the dictionary.
//                         int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
//                         utext_setNativeIndex(text, current+cuWordLength+chars);
//                         if (num_candidates > 0) {
//                             break;
//                         }
//                     }
//                 }

//                 // Bump the word count if there wasn't already one
//                 if (cuWordLength <= 0) {
//                     wordsFound += 1;
//                 }

//                 // Update the length with the passed-over characters
//                 cuWordLength += chars;
//             }
//             else {
//                 // Back up to where we were for next iteration
//                 utext_setNativeIndex(text, current+cuWordLength);
//             }
//         }

//         // Never stop before a combining mark.
//         int32_t currPos;
//         while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
//             utext_next32(text);
//             cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
//         }

//         // Look ahead for possible suffixes if a dictionary word does not follow.
//         // We do this in code rather than using a rule so that the heuristic
//         // resynch continues to function. For example, one of the suffix characters
//         // could be a typo in the middle of a word.
// //        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
// //            if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
// //                && fSuffixSet.contains(uc = utext_current32(text))) {
// //                if (uc == KHMER_PAIYANNOI) {
// //                    if (!fSuffixSet.contains(utext_previous32(text))) {
// //                        // Skip over previous end and PAIYANNOI
// //                        utext_next32(text);
// //                        utext_next32(text);
// //                        wordLength += 1;            // Add PAIYANNOI to word
// //                        uc = utext_current32(text);     // Fetch next character
// //                    }
// //                    else {
// //                        // Restore prior position
// //                        utext_next32(text);
// //                    }
// //                }
// //                if (uc == KHMER_MAIYAMOK) {
// //                    if (utext_previous32(text) != KHMER_MAIYAMOK) {
// //                        // Skip over previous end and MAIYAMOK
// //                        utext_next32(text);
// //                        utext_next32(text);
// //                        wordLength += 1;            // Add MAIYAMOK to word
// //                    }
// //                    else {
// //                        // Restore prior position
// //                        utext_next32(text);
// //                    }
// //                }
// //            }
// //            else {
// //                utext_setNativeIndex(text, current+wordLength);
// //            }
// //        }

//         // Did we find a word on this iteration? If so, push it on the break stack
//         if (cuWordLength > 0) {
//             foundBreaks.push((current+cuWordLength), status);
//         }
//     }

//     // Don't return a break for the end of the dictionary range if there is one there.
//     if (foundBreaks.peeki() >= rangeEnd) {
//         (void) foundBreaks.popi();
//         wordsFound -= 1;
//     }

//     return wordsFound;
// }

// #if !UCONFIG_NO_NORMALIZATION
// /*
//  ******************************************************************
//  * CjkBreakEngine
//  */
// static const uint32_t kuint32max = 0xFFFFFFFF;
// CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
// : DictionaryBreakEngine(), fDictionary(adoptDictionary) {
//     // Korean dictionary only includes Hangul syllables
//     fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
//     fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
//     fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
//     fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
//     nfkcNorm2 = Normalizer2::getNFKCInstance(status);

//     if (U_SUCCESS(status)) {
//         // handle Korean and Japanese/Chinese using different dictionaries
//         if (type == kKorean) {
//             setCharacters(fHangulWordSet);
//         } else { //Chinese and Japanese
//             UnicodeSet cjSet;
//             cjSet.addAll(fHanWordSet);
//             cjSet.addAll(fKatakanaWordSet);
//             cjSet.addAll(fHiraganaWordSet);
//             cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
//             cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
//             setCharacters(cjSet);
//         }
//     }
// }

// CjkBreakEngine::~CjkBreakEngine(){
//     delete fDictionary;
// }

// // The katakanaCost values below are based on the length frequencies of all
// // katakana phrases in the dictionary
// static const int32_t kMaxKatakanaLength = 8;
// static const int32_t kMaxKatakanaGroupLength = 20;
// static const uint32_t maxSnlp = 255;

// static inline uint32_t getKatakanaCost(int32_t wordLength){
//     //TODO: fill array with actual values from dictionary!
//     static const uint32_t katakanaCost[kMaxKatakanaLength + 1]
//                                        = {8192, 984, 408, 240, 204, 252, 300, 372, 480};
//     return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
// }

// static inline bool isKatakana(UChar32 value) {
//     return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
//             (value >= 0xFF66 && value <= 0xFF9f);
// }


// // Function for accessing internal utext flags.
// //   Replicates an internal UText function.

// static inline int32_t utext_i32_flag(int32_t bitIndex) {
//     return (int32_t)1 << bitIndex;
// }


// /*
//  * @param text A UText representing the text
//  * @param rangeStart The start of the range of dictionary characters
//  * @param rangeEnd The end of the range of dictionary characters
//  * @param foundBreaks vector<int32> to receive the break positions
//  * @return The number of breaks found
//  */
// int32_t
// CjkBreakEngine::divideUpDictionaryRange( UText *inText,
//         int32_t rangeStart,
//         int32_t rangeEnd,
//         UVector32 &foundBreaks ) const {
//     if (rangeStart >= rangeEnd) {
//         return 0;
//     }

//     // UnicodeString version of input UText, NFKC normalized if necessary.
//     UnicodeString inString;

//     // inputMap[inStringIndex] = corresponding native index from UText inText.
//     // If NULL then mapping is 1:1
//     LocalPointer<UVector32>     inputMap;

//     UErrorCode     status      = U_ZERO_ERROR;


//     // if UText has the input string as one contiguous UTF-16 chunk
//     if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
//          inText->chunkNativeStart <= rangeStart &&
//          inText->chunkNativeLimit >= rangeEnd   &&
//          inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) {

//         // Input UText is in one contiguous UTF-16 chunk.
//         // Use Read-only aliasing UnicodeString.
//         inString.setTo(FALSE,
//                        inText->chunkContents + rangeStart - inText->chunkNativeStart,
//                        rangeEnd - rangeStart);
//     } else {
//         // Copy the text from the original inText (UText) to inString (UnicodeString).
//         // Create a map from UnicodeString indices -> UText offsets.
//         utext_setNativeIndex(inText, rangeStart);
//         int32_t limit = rangeEnd;
//         U_ASSERT(limit <= utext_nativeLength(inText));
//         if (limit > utext_nativeLength(inText)) {
//             limit = (int32_t)utext_nativeLength(inText);
//         }
//         inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
//         if (U_FAILURE(status)) {
//             return 0;
//         }
//         while (utext_getNativeIndex(inText) < limit) {
//             int32_t nativePosition = (int32_t)utext_getNativeIndex(inText);
//             UChar32 c = utext_next32(inText);
//             U_ASSERT(c != U_SENTINEL);
//             inString.append(c);
//             while (inputMap->size() < inString.length()) {
//                 inputMap->addElement(nativePosition, status);
//             }
//         }
//         inputMap->addElement(limit, status);
//     }


//     if (!nfkcNorm2->isNormalized(inString, status)) {
//         UnicodeString normalizedInput;
//         //  normalizedMap[normalizedInput position] ==  original UText position.
//         LocalPointer<UVector32> normalizedMap(new UVector32(status), status);
//         if (U_FAILURE(status)) {
//             return 0;
//         }

//         UnicodeString fragment;
//         UnicodeString normalizedFragment;
//         for (int32_t srcI = 0; srcI < inString.length();) {  // Once per normalization chunk
//             fragment.remove();
//             int32_t fragmentStartI = srcI;
//             UChar32 c = inString.char32At(srcI);
//             for (;;) {
//                 fragment.append(c);
//                 srcI = inString.moveIndex32(srcI, 1);
//                 if (srcI == inString.length()) {
//                     break;
//                 }
//                 c = inString.char32At(srcI);
//                 if (nfkcNorm2->hasBoundaryBefore(c)) {
//                     break;
//                 }
//             }
//             nfkcNorm2->normalize(fragment, normalizedFragment, status);
//             normalizedInput.append(normalizedFragment);

//             // Map every position in the normalized chunk to the start of the chunk
//             //   in the original input.
//             int32_t fragmentOriginalStart = inputMap.isValid() ?
//                     inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart;
//             while (normalizedMap->size() < normalizedInput.length()) {
//                 normalizedMap->addElement(fragmentOriginalStart, status);
//                 if (U_FAILURE(status)) {
//                     break;
//                 }
//             }
//         }
//         U_ASSERT(normalizedMap->size() == normalizedInput.length());
//         int32_t nativeEnd = inputMap.isValid() ?
//                 inputMap->elementAti(inString.length()) : inString.length()+rangeStart;
//         normalizedMap->addElement(nativeEnd, status);

//         inputMap = std::move(normalizedMap);
//         inString = std::move(normalizedInput);
//     }

//     int32_t numCodePts = inString.countChar32();
//     if (numCodePts != inString.length()) {
//         // There are supplementary characters in the input.
//         // The dictionary will produce boundary positions in terms of code point indexes,
//         //   not in terms of code unit string indexes.
//         // Use the inputMap mechanism to take care of this in addition to indexing differences
//         //    from normalization and/or UTF-8 input.
//         UBool hadExistingMap = inputMap.isValid();
//         if (!hadExistingMap) {
//             inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
//             if (U_FAILURE(status)) {
//                 return 0;
//             }
//         }
//         int32_t cpIdx = 0;
//         for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) {
//             U_ASSERT(cuIdx >= cpIdx);
//             if (hadExistingMap) {
//                 inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx);
//             } else {
//                 inputMap->addElement(cuIdx+rangeStart, status);
//             }
//             cpIdx++;
//             if (cuIdx == inString.length()) {
//                break;
//             }
//         }
//     }

//     // bestSnlp[i] is the snlp of the best segmentation of the first i
//     // code points in the range to be matched.
//     UVector32 bestSnlp(numCodePts + 1, status);
//     bestSnlp.addElement(0, status);
//     for(int32_t i = 1; i <= numCodePts; i++) {
//         bestSnlp.addElement(kuint32max, status);
//     }


//     // prev[i] is the index of the last CJK code point in the previous word in
//     // the best segmentation of the first i characters.
//     UVector32 prev(numCodePts + 1, status);
//     for(int32_t i = 0; i <= numCodePts; i++){
//         prev.addElement(-1, status);
//     }

//     const int32_t maxWordSize = 20;
//     UVector32 values(numCodePts, status);
//     values.setSize(numCodePts);
//     UVector32 lengths(numCodePts, status);
//     lengths.setSize(numCodePts);

//     UText fu = UTEXT_INITIALIZER;
//     utext_openUnicodeString(&fu, &inString, &status);

//     // Dynamic programming to find the best segmentation.

//     // In outer loop, i  is the code point index,
//     //                ix is the corresponding string (code unit) index.
//     //    They differ when the string contains supplementary characters.
//     int32_t ix = 0;
//     bool is_prev_katakana = false;
//     for (int32_t i = 0;  i < numCodePts;  ++i, ix = inString.moveIndex32(ix, 1)) {
//         if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
//             continue;
//         }

//         int32_t count;
//         utext_setNativeIndex(&fu, ix);
//         count = fDictionary->matches(&fu, maxWordSize, numCodePts,
//                              NULL, lengths.getBuffer(), values.getBuffer(), NULL);
//                              // Note: lengths is filled with code point lengths
//                              //       The NULL parameter is the ignored code unit lengths.

//         // if there are no single character matches found in the dictionary
//         // starting with this character, treat character as a 1-character word
//         // with the highest value possible, i.e. the least likely to occur.
//         // Exclude Korean characters from this treatment, as they should be left
//         // together by default.
//         if ((count == 0 || lengths.elementAti(0) != 1) &&
//                 !fHangulWordSet.contains(inString.char32At(ix))) {
//             values.setElementAt(maxSnlp, count);   // 255
//             lengths.setElementAt(1, count++);
//         }

//         for (int32_t j = 0; j < count; j++) {
//             uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j);
//             int32_t ln_j_i = lengths.elementAti(j) + i;
//             if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) {
//                 bestSnlp.setElementAt(newSnlp, ln_j_i);
//                 prev.setElementAt(i, ln_j_i);
//             }
//         }

//         // In Japanese,
//         // Katakana word in single character is pretty rare. So we apply
//         // the following heuristic to Katakana: any continuous run of Katakana
//         // characters is considered a candidate word with a default cost
//         // specified in the katakanaCost table according to its length.

//         bool is_katakana = isKatakana(inString.char32At(ix));
//         int32_t katakanaRunLength = 1;
//         if (!is_prev_katakana && is_katakana) {
//             int32_t j = inString.moveIndex32(ix, 1);
//             // Find the end of the continuous run of Katakana characters
//             while (j < inString.length() && katakanaRunLength < kMaxKatakanaGroupLength &&
//                     isKatakana(inString.char32At(j))) {
//                 j = inString.moveIndex32(j, 1);
//                 katakanaRunLength++;
//             }
//             if (katakanaRunLength < kMaxKatakanaGroupLength) {
//                 uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
//                 if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
//                     bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
//                     prev.setElementAt(i, i+katakanaRunLength);  // prev[j] = i;
//                 }
//             }
//         }
//         is_prev_katakana = is_katakana;
//     }
//     utext_close(&fu);

//     // Start pushing the optimal offset index into t_boundary (t for tentative).
//     // prev[numCodePts] is guaranteed to be meaningful.
//     // We'll first push in the reverse order, i.e.,
//     // t_boundary[0] = numCodePts, and afterwards do a swap.
//     UVector32 t_boundary(numCodePts+1, status);

//     int32_t numBreaks = 0;
//     // No segmentation found, set boundary to end of range
//     if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
//         t_boundary.addElement(numCodePts, status);
//         numBreaks++;
//     } else {
//         for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
//             t_boundary.addElement(i, status);
//             numBreaks++;
//         }
//         U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
//     }

//     // Add a break for the start of the dictionary range if there is not one
//     // there already.
//     if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) {
//         t_boundary.addElement(0, status);
//         numBreaks++;
//     }

//     // Now that we're done, convert positions in t_boundary[] (indices in
//     // the normalized input string) back to indices in the original input UText
//     // while reversing t_boundary and pushing values to foundBreaks.
//     int32_t prevCPPos = -1;
//     int32_t prevUTextPos = -1;
//     for (int32_t i = numBreaks-1; i >= 0; i--) {
//         int32_t cpPos = t_boundary.elementAti(i);
//         U_ASSERT(cpPos > prevCPPos);
//         int32_t utextPos =  inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
//         U_ASSERT(utextPos >= prevUTextPos);
//         if (utextPos > prevUTextPos) {
//             // Boundaries are added to foundBreaks output in ascending order.
//             U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
//             foundBreaks.push(utextPos, status);
//         } else {
//             // Normalization expanded the input text, the dictionary found a boundary
//             // within the expansion, giving two boundaries with the same index in the
//             // original text. Ignore the second. See ticket #12918.
//             --numBreaks;
//         }
//         prevCPPos = cpPos;
//         prevUTextPos = utextPos;
//     }
//     (void)prevCPPos; // suppress compiler warnings about unused variable

//     // inString goes out of scope
//     // inputMap goes out of scope
//     return numBreaks;
// }
// #endif

// U_NAMESPACE_END

// #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2014-2016, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* dictionarydata.h
*
* created on: 2012may31
* created by: Markus W. Scherer & Maxime Serrano
*/

// #include "dictionarydata.h"
// #include "unicode/ucharstrie.h"
// #include "unicode/bytestrie.h"
// #include "unicode/udata.h"
// #include "cmemory.h"

// #if !UCONFIG_NO_BREAK_ITERATION

// U_NAMESPACE_BEGIN

// const int32_t  DictionaryData::TRIE_TYPE_BYTES = 0;
// const int32_t  DictionaryData::TRIE_TYPE_UCHARS = 1;
// const int32_t  DictionaryData::TRIE_TYPE_MASK = 7;
// const int32_t  DictionaryData::TRIE_HAS_VALUES = 8;

// const int32_t  DictionaryData::TRANSFORM_NONE = 0;
// const int32_t  DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
// const int32_t  DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
// const int32_t  DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;

// DictionaryMatcher::~DictionaryMatcher() {
// }

// UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
//     udata_close(file);
// }

// int32_t UCharsDictionaryMatcher::getType() const {
//     return DictionaryData::TRIE_TYPE_UCHARS;
// }

// int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
//                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
//                             int32_t *prefix) const {

//     UCharsTrie uct(characters);
//     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
//     int32_t wordCount = 0;
//     int32_t codePointsMatched = 0;

//     for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
//         UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
//         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
//         codePointsMatched += 1;
//         if (USTRINGTRIE_HAS_VALUE(result)) {
//             if (wordCount < limit) {
//                 if (values != NULL) {
//                     values[wordCount] = uct.getValue();
//                 }
//                 if (lengths != NULL) {
//                     lengths[wordCount] = lengthMatched;
//                 }
//                 if (cpLengths != NULL) {
//                     cpLengths[wordCount] = codePointsMatched;
//                 }
//                 ++wordCount;
//             }
//             if (result == USTRINGTRIE_FINAL_VALUE) {
//                 break;
//             }
//         }
//         else if (result == USTRINGTRIE_NO_MATCH) {
//             break;
//         }
//         if (lengthMatched >= maxLength) {
//             break;
//         }
//     }

//     if (prefix != NULL) {
//         *prefix = codePointsMatched;
//     }
//     return wordCount;
// }

// BytesDictionaryMatcher::~BytesDictionaryMatcher() {
//     udata_close(file);
// }

// UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
//     if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
//         if (c == 0x200D) {
//             return 0xFF;
//         } else if (c == 0x200C) {
//             return 0xFE;
//         }
//         int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
//         if (delta < 0 || 0xFD < delta) {
//             return U_SENTINEL;
//         }
//         return (UChar32)delta;
//     }
//     return c;
// }

// int32_t BytesDictionaryMatcher::getType() const {
//     return DictionaryData::TRIE_TYPE_BYTES;
// }

// int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
//                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
//                             int32_t *prefix) const {
//     BytesTrie bt(characters);
//     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
//     int32_t wordCount = 0;
//     int32_t codePointsMatched = 0;

//     for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
//         UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
//         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
//         codePointsMatched += 1;
//         if (USTRINGTRIE_HAS_VALUE(result)) {
//             if (wordCount < limit) {
//                 if (values != NULL) {
//                     values[wordCount] = bt.getValue();
//                 }
//                 if (lengths != NULL) {
//                     lengths[wordCount] = lengthMatched;
//                 }
//                 if (cpLengths != NULL) {
//                     cpLengths[wordCount] = codePointsMatched;
//                 }
//                 ++wordCount;
//             }
//             if (result == USTRINGTRIE_FINAL_VALUE) {
//                 break;
//             }
//         }
//         else if (result == USTRINGTRIE_NO_MATCH) {
//             break;
//         }
//         if (lengthMatched >= maxLength) {
//             break;
//         }
//     }

//     if (prefix != NULL) {
//         *prefix = codePointsMatched;
//     }
//     return wordCount;
// }


// U_NAMESPACE_END

// U_NAMESPACE_USE

// U_CAPI int32_t U_EXPORT2
// udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
//            void *outData, UErrorCode *pErrorCode) {
//     const UDataInfo *pInfo;
//     int32_t headerSize;
//     const uint8_t *inBytes;
//     uint8_t *outBytes;
//     const int32_t *inIndexes;
//     int32_t indexes[DictionaryData::IX_COUNT];
//     int32_t i, offset, size;

//     headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
//     if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
//     pInfo = (const UDataInfo *)((const char *)inData + 4);
//     if (!(pInfo->dataFormat[0] == 0x44 &&
//           pInfo->dataFormat[1] == 0x69 &&
//           pInfo->dataFormat[2] == 0x63 &&
//           pInfo->dataFormat[3] == 0x74 &&
//           pInfo->formatVersion[0] == 1)) {
//         udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
//                          pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
//         *pErrorCode = U_UNSUPPORTED_ERROR;
//         return 0;
//     }

//     inBytes = (const uint8_t *)inData + headerSize;
//     outBytes = (uint8_t *)outData + headerSize;

//     inIndexes = (const int32_t *)inBytes;
//     if (length >= 0) {
//         length -= headerSize;
//         if (length < (int32_t)(sizeof(indexes))) {
//             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
//             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
//             return 0;
//         }
//     }

//     for (i = 0; i < DictionaryData::IX_COUNT; i++) {
//         indexes[i] = udata_readInt32(ds, inIndexes[i]);
//     }

//     size = indexes[DictionaryData::IX_TOTAL_SIZE];

//     if (length >= 0) {
//         if (length < size) {
//             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
//             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
//             return 0;
//         }

//         if (inBytes != outBytes) {
//             uprv_memcpy(outBytes, inBytes, size);
//         }

//         offset = 0;
//         ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
//         offset = (int32_t)sizeof(indexes);
//         int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
//         int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];

//         if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
//             ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
//         } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
//             // nothing to do
//         } else {
//             udata_printError(ds, "udict_swap(): unknown trie type!\n");
//             *pErrorCode = U_UNSUPPORTED_ERROR;
//             return 0;
//         }

//         // these next two sections are empty in the current format,
//         // but may be used later.
//         offset = nextOffset;
//         nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
//         offset = nextOffset;
//         nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
//         offset = nextOffset;
//     }
//     return headerSize + size;
// }
// #endif
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*******************************************************************************
* Copyright (C) 2008, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File DTINTRV.CPP 
*
*******************************************************************************
*/



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2008-2009, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File DTINTRV.H 
*
*******************************************************************************
*/




#if U_SHOW_CPLUSPLUS_API



/**
 * \file
 * \brief C++ API: Date Interval data type
 */

U_NAMESPACE_BEGIN


/**
 * This class represents a date interval.
 * It is a pair of UDate representing from UDate 1 to UDate 2.
 * @stable ICU 4.0
**/
class U_COMMON_API DateInterval : public UObject {
public:

    /** 
     * Construct a DateInterval given a from date and a to date.
     * @param fromDate  The from date in date interval.
     * @param toDate    The to date in date interval.
     * @stable ICU 4.0
     */
    DateInterval(UDate fromDate, UDate toDate);

    /**
     * destructor
     * @stable ICU 4.0
     */
    virtual ~DateInterval();
 
    /** 
     * Get the from date.
     * @return  the from date in dateInterval.
     * @stable ICU 4.0
     */
    inline UDate getFromDate() const;

    /** 
     * Get the to date.
     * @return  the to date in dateInterval.
     * @stable ICU 4.0
     */
    inline UDate getToDate() const;


    /**
     * Return the class ID for this class. This is useful only for comparing to
     * a return value from getDynamicClassID(). For example:
     * <pre>
     * .   Base* polymorphic_pointer = createPolymorphicObject();
     * .   if (polymorphic_pointer->getDynamicClassID() ==
     * .       derived::getStaticClassID()) ...
     * </pre>
     * @return          The class ID for all objects of this class.
     * @stable ICU 4.0
     */
    static UClassID U_EXPORT2 getStaticClassID(void);

    /**
     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
     * method is to implement a simple version of RTTI, since not all C++
     * compilers support genuine RTTI. Polymorphic operator==() and clone()
     * methods call this method.
     *
     * @return          The class ID for this object. All objects of a
     *                  given class have the same class ID.  Objects of
     *                  other classes have different class IDs.
     * @stable ICU 4.0
     */
    virtual UClassID getDynamicClassID(void) const;

    
    /**
     * Copy constructor.
     * @stable ICU 4.0
     */
    DateInterval(const DateInterval& other);

    /**
     * Default assignment operator
     * @stable ICU 4.0
     */
    DateInterval& operator=(const DateInterval&);

    /**
     * Equality operator.
     * @return TRUE if the two DateIntervals are the same
     * @stable ICU 4.0
     */
    virtual UBool operator==(const DateInterval& other) const;

    /**
     * Non-equality operator
     * @return TRUE if the two DateIntervals are not the same
     * @stable ICU 4.0
     */
    inline UBool operator!=(const DateInterval& other) const;


    /**
     * clone this object. 
     * The caller owns the result and should delete it when done.
     * @return a cloned DateInterval
     * @stable ICU 4.0
     */
     virtual DateInterval* clone() const;

private:
    /** 
     * Default constructor, not implemented.
     */
    DateInterval();

    UDate fromDate;
    UDate toDate;

} ;// end class DateInterval


inline UDate 
DateInterval::getFromDate() const { 
    return fromDate; 
}


inline UDate 
DateInterval::getToDate() const { 
    return toDate; 
}


inline UBool 
DateInterval::operator!=(const DateInterval& other) const { 
    return ( !operator==(other) );
}


U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */




U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)

//DateInterval::DateInterval(){}


DateInterval::DateInterval(UDate from, UDate to)
:   fromDate(from),
    toDate(to)
{}


DateInterval::~DateInterval(){}


DateInterval::DateInterval(const DateInterval& other)
: UObject(other) {
    *this = other;
}   


DateInterval&
DateInterval::operator=(const DateInterval& other) {
    if ( this != &other ) {
        fromDate = other.fromDate;
        toDate = other.toDate;
    }
    return *this;
}


DateInterval* 
DateInterval::clone() const {
    return new DateInterval(*this);
}


UBool 
DateInterval::operator==(const DateInterval& other) const { 
    return ( fromDate == other.fromDate && toDate == other.toDate );
}


U_NAMESPACE_END

// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// edits.cpp
// created: 2017feb08 Markus W. Scherer






// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 **********************************************************************
 *   Copyright (c) 2001-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   11/19/2001  aliu        Creation.
 **********************************************************************
 */






//--------------------------------------------------------------------
// class ICU_Utility
// i18n utility functions, scoped into the class ICU_Utility.
//--------------------------------------------------------------------

U_NAMESPACE_BEGIN

class UnicodeMatcher;

class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
 public:

    /**
     * Append a number to the given UnicodeString in the given radix.
     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
     * radices 11 through 36.
     * @param result the digits of the number are appended here
     * @param n the number to be converted to digits; may be negative.
     * If negative, a '-' is prepended to the digits.
     * @param radix a radix from 2 to 36 inclusive.
     * @param minDigits the minimum number of digits, not including
     * any '-', to produce.  Values less than 2 have no effect.  One
     * digit is always emitted regardless of this parameter.
     * @return a reference to result
     */
    static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
                                       int32_t radix = 10,
                                       int32_t minDigits = 1);

    /** Returns a bogus UnicodeString by value. */
    static inline UnicodeString makeBogusString() {
        UnicodeString result;
        result.setToBogus();
        return result;
    }

    /**
     * Return true if the character is NOT printable ASCII.
     *
     * This method should really be in UnicodeString (or similar).  For
     * now, we implement it here and share it with friend classes.
     */
    static UBool isUnprintable(UChar32 c);

    /**
     * Escape unprintable characters using \uxxxx notation for U+0000 to
     * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
     * printable ASCII, then do nothing and return FALSE.  Otherwise,
     * append the escaped notation and return TRUE.
     */
    static UBool escapeUnprintable(UnicodeString& result, UChar32 c);

    /**
     * Returns the index of a character, ignoring quoted text.
     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
     * found by a search for 'h'.
     * @param text text to be searched
     * @param start the beginning index, inclusive; <code>0 <= start
     * <= limit</code>.
     * @param limit the ending index, exclusive; <code>start <= limit
     * <= text.length()</code>.
     * @param c character to search for
     * @return Offset of the first instance of c, or -1 if not found.
     */
//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
//    static int32_t quotedIndexOf(const UnicodeString& text,
//                                 int32_t start, int32_t limit,
//                                 UChar c);

    /**
     * Skip over a sequence of zero or more white space characters at pos.
     * @param advance if true, advance pos to the first non-white-space
     * character at or after pos, or str.length(), if there is none.
     * Otherwise leave pos unchanged.
     * @return the index of the first non-white-space character at or
     * after pos, or str.length(), if there is none.
     */
    static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
                                  UBool advance = FALSE);

    /**
     * Skip over Pattern_White_Space in a Replaceable.
     * Skipping may be done in the forward or
     * reverse direction.  In either case, the leftmost index will be
     * inclusive, and the rightmost index will be exclusive.  That is,
     * given a range defined as [start, limit), the call
     * skipWhitespace(text, start, limit) will advance start past leading
     * whitespace, whereas the call skipWhitespace(text, limit, start),
     * will back up limit past trailing whitespace.
     * @param text the text to be analyzed
     * @param pos either the start or limit of a range of 'text', to skip
     * leading or trailing whitespace, respectively
     * @param stop either the limit or start of a range of 'text', to skip
     * leading or trailing whitespace, respectively
     * @return the new start or limit, depending on what was passed in to
     * 'pos'
     */
//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
//?    static int32_t skipWhitespace(const Replaceable& text,
//?                                  int32_t pos, int32_t stop);

    /**
     * Parse a single non-whitespace character 'ch', optionally
     * preceded by whitespace.
     * @param id the string to be parsed
     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
     * offset of the first character to be parsed.  On output, pos[0]
     * is the index after the last parsed character.  If the parse
     * fails, pos[0] will be unchanged.
     * @param ch the non-whitespace character to be parsed.
     * @return true if 'ch' is seen preceded by zero or more
     * whitespace characters.
     */
    static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);

    /**
     * Parse a pattern string starting at offset pos.  Keywords are
     * matched case-insensitively.  Spaces may be skipped and may be
     * optional or required.  Integer values may be parsed, and if
     * they are, they will be returned in the given array.  If
     * successful, the offset of the next non-space character is
     * returned.  On failure, -1 is returned.
     * @param pattern must only contain lowercase characters, which
     * will match their uppercase equivalents as well.  A space
     * character matches one or more required spaces.  A '~' character
     * matches zero or more optional spaces.  A '#' character matches
     * an integer and stores it in parsedInts, which the caller must
     * ensure has enough capacity.
     * @param parsedInts array to receive parsed integers.  Caller
     * must ensure that parsedInts.length is >= the number of '#'
     * signs in 'pattern'.
     * @return the position after the last character parsed, or -1 if
     * the parse failed
     */
    static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
                                const UnicodeString& pattern, int32_t* parsedInts);
        
    /**
     * Parse a pattern string within the given Replaceable and a parsing
     * pattern.  Characters are matched literally and case-sensitively
     * except for the following special characters:
     *
     * ~  zero or more Pattern_White_Space chars
     *
     * If end of pattern is reached with all matches along the way,
     * pos is advanced to the first unparsed index and returned.
     * Otherwise -1 is returned.
     * @param pat pattern that controls parsing
     * @param text text to be parsed, starting at index
     * @param index offset to first character to parse
     * @param limit offset after last character to parse
     * @return index after last parsed character, or -1 on parse failure.
     */
    static int32_t parsePattern(const UnicodeString& pat,
                                const Replaceable& text,
                                int32_t index,
                                int32_t limit);

    /**
     * Parse an integer at pos, either of the form \d+ or of the form
     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
     * or octal format.
     * @param pos INPUT-OUTPUT parameter.  On input, the index of the first
     * character to parse.  On output, the index of the character after the
     * last parsed character.
     */
    static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);

    /**
     * Parse an integer at pos using only ASCII digits.
     * Base 10 only.
     * @param pos INPUT-OUTPUT parameter.  On input, the index of the first
     * character to parse.  On output, the index of the character after the
     * last parsed character.
     */
    static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);

    /**
     * Parse a Unicode identifier from the given string at the given
     * position.  Return the identifier, or an empty string if there
     * is no identifier.
     * @param str the string to parse
     * @param pos INPUT-OUPUT parameter.  On INPUT, pos is the
     * first character to examine.  It must be less than str.length(),
     * and it must not point to a whitespace character.  That is, must
     * have pos < str.length() and
     * !UCharacter::isWhitespace(str.char32At(pos)).  On
     * OUTPUT, the position after the last parsed character.
     * @return the Unicode identifier, or an empty string if there is
     * no valid identifier at pos.
     */
    static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);

    /**
     * Parse an unsigned 31-bit integer at the given offset.  Use
     * UCharacter.digit() to parse individual characters into digits.
     * @param text the text to be parsed
     * @param pos INPUT-OUTPUT parameter.  On entry, pos is the
     * offset within text at which to start parsing; it should point
     * to a valid digit.  On exit, pos is the offset after the last
     * parsed character.  If the parse failed, it will be unchanged on
     * exit.  Must be >= 0 on entry.
     * @param radix the radix in which to parse; must be >= 2 and <=
     * 36.
     * @return a non-negative parsed number, or -1 upon parse failure.
     * Parse fails if there are no digits, that is, if pos does not
     * point to a valid digit on entry, or if the number to be parsed
     * does not fit into a 31-bit unsigned integer.
     */
    static int32_t parseNumber(const UnicodeString& text,
                               int32_t& pos, int8_t radix);

    static void appendToRule(UnicodeString& rule,
                             UChar32 c,
                             UBool isLiteral,
                             UBool escapeUnprintable,
                             UnicodeString& quoteBuf);
    
    static void appendToRule(UnicodeString& rule,
                             const UnicodeString& text,
                             UBool isLiteral,
                             UBool escapeUnprintable,
                             UnicodeString& quoteBuf);

    static void appendToRule(UnicodeString& rule,
                             const UnicodeMatcher* matcher,
                             UBool escapeUnprintable,
                             UnicodeString& quoteBuf);

private:
    // do not instantiate
    ICU_Utility();
};

U_NAMESPACE_END



U_NAMESPACE_BEGIN

namespace {

// 0000uuuuuuuuuuuu records u+1 unchanged text units.
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;

// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
const int32_t MAX_SHORT_CHANGE = 0x6fff;

// 0111mmmmmmnnnnnn records a replacement of m text units with n.
// m or n = 61: actual length follows in the next edits array unit.
// m or n = 62..63: actual length follows in the next two edits array units.
// Bit 30 of the actual length is in the head unit.
// Trailing units have bit 15 set.
const int32_t LENGTH_IN_1TRAIL = 61;
const int32_t LENGTH_IN_2TRAIL = 62;

}  // namespace

void Edits::releaseArray() U_NOEXCEPT {
    if (array != stackArray) {
        uprv_free(array);
    }
}

Edits &Edits::copyArray(const Edits &other) {
    if (U_FAILURE(errorCode_)) {
        length = delta = numChanges = 0;
        return *this;
    }
    if (length > capacity) {
        uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
        if (newArray == nullptr) {
            length = delta = numChanges = 0;
            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
            return *this;
        }
        releaseArray();
        array = newArray;
        capacity = length;
    }
    if (length > 0) {
        uprv_memcpy(array, other.array, (size_t)length * 2);
    }
    return *this;
}

Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
    if (U_FAILURE(errorCode_)) {
        length = delta = numChanges = 0;
        return *this;
    }
    releaseArray();
    if (length > STACK_CAPACITY) {
        array = src.array;
        capacity = src.capacity;
        src.array = src.stackArray;
        src.capacity = STACK_CAPACITY;
        src.reset();
        return *this;
    }
    array = stackArray;
    capacity = STACK_CAPACITY;
    if (length > 0) {
        uprv_memcpy(array, src.array, (size_t)length * 2);
    }
    return *this;
}

Edits &Edits::operator=(const Edits &other) {
    length = other.length;
    delta = other.delta;
    numChanges = other.numChanges;
    errorCode_ = other.errorCode_;
    return copyArray(other);
}

Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
    length = src.length;
    delta = src.delta;
    numChanges = src.numChanges;
    errorCode_ = src.errorCode_;
    return moveArray(src);
}

Edits::~Edits() {
    releaseArray();
}

void Edits::reset() U_NOEXCEPT {
    length = delta = numChanges = 0;
    errorCode_ = U_ZERO_ERROR;
}

void Edits::addUnchanged(int32_t unchangedLength) {
    if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
    if(unchangedLength < 0) {
        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    // Merge into previous unchanged-text record, if any.
    int32_t last = lastUnit();
    if(last < MAX_UNCHANGED) {
        int32_t remaining = MAX_UNCHANGED - last;
        if (remaining >= unchangedLength) {
            setLastUnit(last + unchangedLength);
            return;
        }
        setLastUnit(MAX_UNCHANGED);
        unchangedLength -= remaining;
    }
    // Split large lengths into multiple units.
    while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
        append(MAX_UNCHANGED);
        unchangedLength -= MAX_UNCHANGED_LENGTH;
    }
    // Write a small (remaining) length.
    if(unchangedLength > 0) {
        append(unchangedLength - 1);
    }
}

void Edits::addReplace(int32_t oldLength, int32_t newLength) {
    if(U_FAILURE(errorCode_)) { return; }
    if(oldLength < 0 || newLength < 0) {
        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    if (oldLength == 0 && newLength == 0) {
        return;
    }
    ++numChanges;
    int32_t newDelta = newLength - oldLength;
    if (newDelta != 0) {
        if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
                (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
            // Integer overflow or underflow.
            errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
            return;
        }
        delta += newDelta;
    }

    if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
            newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
        // Merge into previous same-lengths short-replacement record, if any.
        int32_t u = (oldLength << 12) | (newLength << 9);
        int32_t last = lastUnit();
        if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
                (last & ~SHORT_CHANGE_NUM_MASK) == u &&
                (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
            setLastUnit(last + 1);
            return;
        }
        append(u);
        return;
    }

    int32_t head = 0x7000;
    if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
        head |= oldLength << 6;
        head |= newLength;
        append(head);
    } else if ((capacity - length) >= 5 || growArray()) {
        int32_t limit = length + 1;
        if(oldLength < LENGTH_IN_1TRAIL) {
            head |= oldLength << 6;
        } else if(oldLength <= 0x7fff) {
            head |= LENGTH_IN_1TRAIL << 6;
            array[limit++] = (uint16_t)(0x8000 | oldLength);
        } else {
            head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
            array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
            array[limit++] = (uint16_t)(0x8000 | oldLength);
        }
        if(newLength < LENGTH_IN_1TRAIL) {
            head |= newLength;
        } else if(newLength <= 0x7fff) {
            head |= LENGTH_IN_1TRAIL;
            array[limit++] = (uint16_t)(0x8000 | newLength);
        } else {
            head |= LENGTH_IN_2TRAIL + (newLength >> 30);
            array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
            array[limit++] = (uint16_t)(0x8000 | newLength);
        }
        array[length] = (uint16_t)head;
        length = limit;
    }
}

void Edits::append(int32_t r) {
    if(length < capacity || growArray()) {
        array[length++] = (uint16_t)r;
    }
}

UBool Edits::growArray() {
    int32_t newCapacity;
    if (array == stackArray) {
        newCapacity = 2000;
    } else if (capacity == INT32_MAX) {
        // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
        // with a result-string-buffer overflow.
        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
        return FALSE;
    } else if (capacity >= (INT32_MAX / 2)) {
        newCapacity = INT32_MAX;
    } else {
        newCapacity = 2 * capacity;
    }
    // Grow by at least 5 units so that a maximal change record will fit.
    if ((newCapacity - capacity) < 5) {
        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
        return FALSE;
    }
    uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
    if (newArray == NULL) {
        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
        return FALSE;
    }
    uprv_memcpy(newArray, array, (size_t)length * 2);
    releaseArray();
    array = newArray;
    capacity = newCapacity;
    return TRUE;
}

UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
    if (U_FAILURE(outErrorCode)) { return TRUE; }
    if (U_SUCCESS(errorCode_)) { return FALSE; }
    outErrorCode = errorCode_;
    return TRUE;
}

Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
    if (copyErrorTo(errorCode)) { return *this; }
    // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
    // Parallel iteration over both Edits.
    Iterator abIter = ab.getFineIterator();
    Iterator bcIter = bc.getFineIterator();
    UBool abHasNext = TRUE, bcHasNext = TRUE;
    // Copy iterator state into local variables, so that we can modify and subdivide spans.
    // ab old & new length, bc old & new length
    int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
    // When we have different-intermediate-length changes, we accumulate a larger change.
    int32_t pending_aLength = 0, pending_cLength = 0;
    for (;;) {
        // At this point, for each of the two iterators:
        // Either we are done with the locally cached current edit,
        // and its intermediate-string length has been reset,
        // or we will continue to work with a truncated remainder of this edit.
        //
        // If the current edit is done, and the iterator has not yet reached the end,
        // then we fetch the next edit. This is true for at least one of the iterators.
        //
        // Normally it does not matter whether we fetch from ab and then bc or vice versa.
        // However, the result is observably different when
        // ab deletions meet bc insertions at the same intermediate-string index.
        // Some users expect the bc insertions to come first, so we fetch from bc first.
        if (bc_bLength == 0) {
            if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
                bc_bLength = bcIter.oldLength();
                cLength = bcIter.newLength();
                if (bc_bLength == 0) {
                    // insertion
                    if (ab_bLength == 0 || !abIter.hasChange()) {
                        addReplace(pending_aLength, pending_cLength + cLength);
                        pending_aLength = pending_cLength = 0;
                    } else {
                        pending_cLength += cLength;
                    }
                    continue;
                }
            }
            // else see if the other iterator is done, too.
        }
        if (ab_bLength == 0) {
            if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
                aLength = abIter.oldLength();
                ab_bLength = abIter.newLength();
                if (ab_bLength == 0) {
                    // deletion
                    if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
                        addReplace(pending_aLength + aLength, pending_cLength);
                        pending_aLength = pending_cLength = 0;
                    } else {
                        pending_aLength += aLength;
                    }
                    continue;
                }
            } else if (bc_bLength == 0) {
                // Both iterators are done at the same time:
                // The intermediate-string lengths match.
                break;
            } else {
                // The ab output string is shorter than the bc input string.
                if (!copyErrorTo(errorCode)) {
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                }
                return *this;
            }
        }
        if (bc_bLength == 0) {
            // The bc input string is shorter than the ab output string.
            if (!copyErrorTo(errorCode)) {
                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
            }
            return *this;
        }
        //  Done fetching: ab_bLength > 0 && bc_bLength > 0

        // The current state has two parts:
        // - Past: We accumulate a longer ac edit in the "pending" variables.
        // - Current: We have copies of the current ab/bc edits in local variables.
        //   At least one side is newly fetched.
        //   One side might be a truncated remainder of an edit we fetched earlier.

        if (!abIter.hasChange() && !bcIter.hasChange()) {
            // An unchanged span all the way from string a to string c.
            if (pending_aLength != 0 || pending_cLength != 0) {
                addReplace(pending_aLength, pending_cLength);
                pending_aLength = pending_cLength = 0;
            }
            int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
            addUnchanged(unchangedLength);
            ab_bLength = aLength -= unchangedLength;
            bc_bLength = cLength -= unchangedLength;
            // At least one of the unchanged spans is now empty.
            continue;
        }
        if (!abIter.hasChange() && bcIter.hasChange()) {
            // Unchanged a->b but changed b->c.
            if (ab_bLength >= bc_bLength) {
                // Split the longer unchanged span into change + remainder.
                addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
                pending_aLength = pending_cLength = 0;
                aLength = ab_bLength -= bc_bLength;
                bc_bLength = 0;
                continue;
            }
            // Handle the shorter unchanged span below like a change.
        } else if (abIter.hasChange() && !bcIter.hasChange()) {
            // Changed a->b and then unchanged b->c.
            if (ab_bLength <= bc_bLength) {
                // Split the longer unchanged span into change + remainder.
                addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
                pending_aLength = pending_cLength = 0;
                cLength = bc_bLength -= ab_bLength;
                ab_bLength = 0;
                continue;
            }
            // Handle the shorter unchanged span below like a change.
        } else {  // both abIter.hasChange() && bcIter.hasChange()
            if (ab_bLength == bc_bLength) {
                // Changes on both sides up to the same position. Emit & reset.
                addReplace(pending_aLength + aLength, pending_cLength + cLength);
                pending_aLength = pending_cLength = 0;
                ab_bLength = bc_bLength = 0;
                continue;
            }
        }
        // Accumulate the a->c change, reset the shorter side,
        // keep a remainder of the longer one.
        pending_aLength += aLength;
        pending_cLength += cLength;
        if (ab_bLength < bc_bLength) {
            bc_bLength -= ab_bLength;
            cLength = ab_bLength = 0;
        } else {  // ab_bLength > bc_bLength
            ab_bLength -= bc_bLength;
            aLength = bc_bLength = 0;
        }
    }
    if (pending_aLength != 0 || pending_cLength != 0) {
        addReplace(pending_aLength, pending_cLength);
    }
    copyErrorTo(errorCode);
    return *this;
}

Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
        array(a), index(0), length(len), remaining(0),
        onlyChanges_(oc), coarse(crs),
        dir(0), changed(FALSE), oldLength_(0), newLength_(0),
        srcIndex(0), replIndex(0), destIndex(0) {}

int32_t Edits::Iterator::readLength(int32_t head) {
    if (head < LENGTH_IN_1TRAIL) {
        return head;
    } else if (head < LENGTH_IN_2TRAIL) {
        U_ASSERT(index < length);
        U_ASSERT(array[index] >= 0x8000);
        return array[index++] & 0x7fff;
    } else {
        U_ASSERT((index + 2) <= length);
        U_ASSERT(array[index] >= 0x8000);
        U_ASSERT(array[index + 1] >= 0x8000);
        int32_t len = ((head & 1) << 30) |
                ((int32_t)(array[index] & 0x7fff) << 15) |
                (array[index + 1] & 0x7fff);
        index += 2;
        return len;
    }
}

void Edits::Iterator::updateNextIndexes() {
    srcIndex += oldLength_;
    if (changed) {
        replIndex += newLength_;
    }
    destIndex += newLength_;
}

void Edits::Iterator::updatePreviousIndexes() {
    srcIndex -= oldLength_;
    if (changed) {
        replIndex -= newLength_;
    }
    destIndex -= newLength_;
}

UBool Edits::Iterator::noNext() {
    // No change before or beyond the string.
    dir = 0;
    changed = FALSE;
    oldLength_ = newLength_ = 0;
    return FALSE;
}

UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
    // Forward iteration: Update the string indexes to the limit of the current span,
    // and post-increment-read array units to assemble a new span.
    // Leaves the array index one after the last unit of that span.
    if (U_FAILURE(errorCode)) { return FALSE; }
    // We have an errorCode in case we need to start guarding against integer overflows.
    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
    if (dir > 0) {
        updateNextIndexes();
    } else {
        if (dir < 0) {
            // Turn around from previous() to next().
            // Post-increment-read the same span again.
            if (remaining > 0) {
                // Fine-grained iterator:
                // Stay on the current one of a sequence of compressed changes.
                ++index;  // next() rests on the index after the sequence unit.
                dir = 1;
                return TRUE;
            }
        }
        dir = 1;
    }
    if (remaining >= 1) {
        // Fine-grained iterator: Continue a sequence of compressed changes.
        if (remaining > 1) {
            --remaining;
            return TRUE;
        }
        remaining = 0;
    }
    if (index >= length) {
        return noNext();
    }
    int32_t u = array[index++];
    if (u <= MAX_UNCHANGED) {
        // Combine adjacent unchanged ranges.
        changed = FALSE;
        oldLength_ = u + 1;
        while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
            ++index;
            oldLength_ += u + 1;
        }
        newLength_ = oldLength_;
        if (onlyChanges) {
            updateNextIndexes();
            if (index >= length) {
                return noNext();
            }
            // already fetched u > MAX_UNCHANGED at index
            ++index;
        } else {
            return TRUE;
        }
    }
    changed = TRUE;
    if (u <= MAX_SHORT_CHANGE) {
        int32_t oldLen = u >> 12;
        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
        if (coarse) {
            oldLength_ = num * oldLen;
            newLength_ = num * newLen;
        } else {
            // Split a sequence of changes that was compressed into one unit.
            oldLength_ = oldLen;
            newLength_ = newLen;
            if (num > 1) {
                remaining = num;  // This is the first of two or more changes.
            }
            return TRUE;
        }
    } else {
        U_ASSERT(u <= 0x7fff);
        oldLength_ = readLength((u >> 6) & 0x3f);
        newLength_ = readLength(u & 0x3f);
        if (!coarse) {
            return TRUE;
        }
    }
    // Combine adjacent changes.
    while (index < length && (u = array[index]) > MAX_UNCHANGED) {
        ++index;
        if (u <= MAX_SHORT_CHANGE) {
            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
            oldLength_ += (u >> 12) * num;
            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
        } else {
            U_ASSERT(u <= 0x7fff);
            oldLength_ += readLength((u >> 6) & 0x3f);
            newLength_ += readLength(u & 0x3f);
        }
    }
    return TRUE;
}

UBool Edits::Iterator::previous(UErrorCode &errorCode) {
    // Backward iteration: Pre-decrement-read array units to assemble a new span,
    // then update the string indexes to the start of that span.
    // Leaves the array index on the head unit of that span.
    if (U_FAILURE(errorCode)) { return FALSE; }
    // We have an errorCode in case we need to start guarding against integer overflows.
    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
    if (dir >= 0) {
        if (dir > 0) {
            // Turn around from next() to previous().
            // Set the string indexes to the span limit and
            // pre-decrement-read the same span again.
            if (remaining > 0) {
                // Fine-grained iterator:
                // Stay on the current one of a sequence of compressed changes.
                --index;  // previous() rests on the sequence unit.
                dir = -1;
                return TRUE;
            }
            updateNextIndexes();
        }
        dir = -1;
    }
    if (remaining > 0) {
        // Fine-grained iterator: Continue a sequence of compressed changes.
        int32_t u = array[index];
        U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
        if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
            ++remaining;
            updatePreviousIndexes();
            return TRUE;
        }
        remaining = 0;
    }
    if (index <= 0) {
        return noNext();
    }
    int32_t u = array[--index];
    if (u <= MAX_UNCHANGED) {
        // Combine adjacent unchanged ranges.
        changed = FALSE;
        oldLength_ = u + 1;
        while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
            --index;
            oldLength_ += u + 1;
        }
        newLength_ = oldLength_;
        // No need to handle onlyChanges as long as previous() is called only from findIndex().
        updatePreviousIndexes();
        return TRUE;
    }
    changed = TRUE;
    if (u <= MAX_SHORT_CHANGE) {
        int32_t oldLen = u >> 12;
        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
        if (coarse) {
            oldLength_ = num * oldLen;
            newLength_ = num * newLen;
        } else {
            // Split a sequence of changes that was compressed into one unit.
            oldLength_ = oldLen;
            newLength_ = newLen;
            if (num > 1) {
                remaining = 1;  // This is the last of two or more changes.
            }
            updatePreviousIndexes();
            return TRUE;
        }
    } else {
        if (u <= 0x7fff) {
            // The change is encoded in u alone.
            oldLength_ = readLength((u >> 6) & 0x3f);
            newLength_ = readLength(u & 0x3f);
        } else {
            // Back up to the head of the change, read the lengths,
            // and reset the index to the head again.
            U_ASSERT(index > 0);
            while ((u = array[--index]) > 0x7fff) {}
            U_ASSERT(u > MAX_SHORT_CHANGE);
            int32_t headIndex = index++;
            oldLength_ = readLength((u >> 6) & 0x3f);
            newLength_ = readLength(u & 0x3f);
            index = headIndex;
        }
        if (!coarse) {
            updatePreviousIndexes();
            return TRUE;
        }
    }
    // Combine adjacent changes.
    while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
        --index;
        if (u <= MAX_SHORT_CHANGE) {
            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
            oldLength_ += (u >> 12) * num;
            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
        } else if (u <= 0x7fff) {
            // Read the lengths, and reset the index to the head again.
            int32_t headIndex = index++;
            oldLength_ += readLength((u >> 6) & 0x3f);
            newLength_ += readLength(u & 0x3f);
            index = headIndex;
        }
    }
    updatePreviousIndexes();
    return TRUE;
}

int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode) || i < 0) { return -1; }
    int32_t spanStart, spanLength;
    if (findSource) {  // find source index
        spanStart = srcIndex;
        spanLength = oldLength_;
    } else {  // find destination index
        spanStart = destIndex;
        spanLength = newLength_;
    }
    if (i < spanStart) {
        if (i >= (spanStart / 2)) {
            // Search backwards.
            for (;;) {
                UBool hasPrevious = previous(errorCode);
                U_ASSERT(hasPrevious);  // because i>=0 and the first span starts at 0
                (void)hasPrevious;  // avoid unused-variable warning
                spanStart = findSource ? srcIndex : destIndex;
                if (i >= spanStart) {
                    // The index is in the current span.
                    return 0;
                }
                if (remaining > 0) {
                    // Is the index in one of the remaining compressed edits?
                    // spanStart is the start of the current span, first of the remaining ones.
                    spanLength = findSource ? oldLength_ : newLength_;
                    int32_t u = array[index];
                    U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
                    int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
                    int32_t len = num * spanLength;
                    if (i >= (spanStart - len)) {
                        int32_t n = ((spanStart - i - 1) / spanLength) + 1;
                        // 1 <= n <= num
                        srcIndex -= n * oldLength_;
                        replIndex -= n * newLength_;
                        destIndex -= n * newLength_;
                        remaining += n;
                        return 0;
                    }
                    // Skip all of these edits at once.
                    srcIndex -= num * oldLength_;
                    replIndex -= num * newLength_;
                    destIndex -= num * newLength_;
                    remaining = 0;
                }
            }
        }
        // Reset the iterator to the start.
        dir = 0;
        index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
    } else if (i < (spanStart + spanLength)) {
        // The index is in the current span.
        return 0;
    }
    while (next(FALSE, errorCode)) {
        if (findSource) {
            spanStart = srcIndex;
            spanLength = oldLength_;
        } else {
            spanStart = destIndex;
            spanLength = newLength_;
        }
        if (i < (spanStart + spanLength)) {
            // The index is in the current span.
            return 0;
        }
        if (remaining > 1) {
            // Is the index in one of the remaining compressed edits?
            // spanStart is the start of the current span, first of the remaining ones.
            int32_t len = remaining * spanLength;
            if (i < (spanStart + len)) {
                int32_t n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
                srcIndex += n * oldLength_;
                replIndex += n * newLength_;
                destIndex += n * newLength_;
                remaining -= n;
                return 0;
            }
            // Make next() skip all of these edits at once.
            oldLength_ *= remaining;
            newLength_ *= remaining;
            remaining = 0;
        }
    }
    return 1;
}

int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
    int32_t where = findIndex(i, TRUE, errorCode);
    if (where < 0) {
        // Error or before the string.
        return 0;
    }
    if (where > 0 || i == srcIndex) {
        // At or after string length, or at start of the found span.
        return destIndex;
    }
    if (changed) {
        // In a change span, map to its end.
        return destIndex + newLength_;
    } else {
        // In an unchanged span, offset 1:1 within it.
        return destIndex + (i - srcIndex);
    }
}

int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
    int32_t where = findIndex(i, FALSE, errorCode);
    if (where < 0) {
        // Error or before the string.
        return 0;
    }
    if (where > 0 || i == destIndex) {
        // At or after string length, or at start of the found span.
        return srcIndex;
    }
    if (changed) {
        // In a change span, map to its end.
        return srcIndex + oldLength_;
    } else {
        // In an unchanged span, offset within it.
        return srcIndex + (i - destIndex);
    }
}

UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
    sb.append(u"{ src[", -1);
    ICU_Utility::appendNumber(sb, srcIndex);
    sb.append(u"..", -1);
    ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
    if (changed) {
        sb.append(u"] ⇝ dest[", -1);
    } else {
        sb.append(u"] ≡ dest[", -1);
    }
    ICU_Utility::appendNumber(sb, destIndex);
    sb.append(u"..", -1);
    ICU_Utility::appendNumber(sb, destIndex + newLength_);
    if (changed) {
        sb.append(u"], repl[", -1);
        ICU_Utility::appendNumber(sb, replIndex);
        sb.append(u"..", -1);
        ICU_Utility::appendNumber(sb, replIndex + newLength_);
        sb.append(u"] }", -1);
    } else {
        sb.append(u"] (no-change) }", -1);
    }
    return sb;
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  errorcode.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009mar10
*   created by: Markus W. Scherer
*/


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  errorcode.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009mar10
*   created by: Markus W. Scherer
*/


/**
 * \file 
 * \brief C++ API: ErrorCode class intended to make it easier to use
 *                 ICU C and C++ APIs from C++ user code.
 */



#if U_SHOW_CPLUSPLUS_API



U_NAMESPACE_BEGIN

/**
 * Wrapper class for UErrorCode, with conversion operators for direct use
 * in ICU C and C++ APIs.
 * Intended to be used as a base class, where a subclass overrides
 * the handleFailure() function so that it throws an exception,
 * does an assert(), logs an error, etc.
 * This is not an abstract base class. This class can be used and instantiated
 * by itself, although it will be more useful when subclassed.
 *
 * Features:
 * - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
 *   removing one common source of errors.
 * - Same use in C APIs taking a UErrorCode * (pointer)
 *   and C++ taking UErrorCode & (reference) via conversion operators.
 * - Possible automatic checking for success when it goes out of scope.
 *
 * Note: For automatic checking for success in the destructor, a subclass
 * must implement such logic in its own destructor because the base class
 * destructor cannot call a subclass function (like handleFailure()).
 * The ErrorCode base class destructor does nothing.
 *
 * Note also: While it is possible for a destructor to throw an exception,
 * it is generally unsafe to do so. This means that in a subclass the destructor
 * and the handleFailure() function may need to take different actions.
 *
 * Sample code:
 * \code
 *   class IcuErrorCode: public icu::ErrorCode {
 *   public:
 *     virtual ~IcuErrorCode() {  // should be defined in .cpp as "key function"
 *       // Safe because our handleFailure() does not throw exceptions.
 *       if(isFailure()) { handleFailure(); }
 *     }
 *   protected:
 *     virtual void handleFailure() const {
 *       log_failure(u_errorName(errorCode));
 *       exit(errorCode);
 *     }
 *   };
 *   IcuErrorCode error_code;
 *   UConverter *cnv = ucnv_open("Shift-JIS", error_code);
 *   length = ucnv_fromUChars(dest, capacity, src, length, error_code);
 *   ucnv_close(cnv);
 *   // IcuErrorCode destructor checks for success.
 * \endcode
 *
 * @stable ICU 4.2
 */
class U_COMMON_API ErrorCode: public UMemory {
public:
    /**
     * Default constructor. Initializes its UErrorCode to U_ZERO_ERROR.
     * @stable ICU 4.2
     */
    ErrorCode() : errorCode(U_ZERO_ERROR) {}
    /** Destructor, does nothing. See class documentation for details. @stable ICU 4.2 */
    virtual ~ErrorCode();
    /** Conversion operator, returns a reference. @stable ICU 4.2 */
    operator UErrorCode & () { return errorCode; }
    /** Conversion operator, returns a pointer. @stable ICU 4.2 */
    operator UErrorCode * () { return &errorCode; }
    /** Tests for U_SUCCESS(). @stable ICU 4.2 */
    UBool isSuccess() const { return U_SUCCESS(errorCode); }
    /** Tests for U_FAILURE(). @stable ICU 4.2 */
    UBool isFailure() const { return U_FAILURE(errorCode); }
    /** Returns the UErrorCode value. @stable ICU 4.2 */
    UErrorCode get() const { return errorCode; }
    /** Sets the UErrorCode value. @stable ICU 4.2 */
    void set(UErrorCode value) { errorCode=value; }
    /** Returns the UErrorCode value and resets it to U_ZERO_ERROR. @stable ICU 4.2 */
    UErrorCode reset();
    /**
     * Asserts isSuccess().
     * In other words, this method checks for a failure code,
     * and the base class handles it like this:
     * \code
     *   if(isFailure()) { handleFailure(); }
     * \endcode
     * @stable ICU 4.4
     */
    void assertSuccess() const;
    /**
     * Return a string for the UErrorCode value.
     * The string will be the same as the name of the error code constant
     * in the UErrorCode enum.
     * @stable ICU 4.4
     */
    const char* errorName() const;

protected:
    /**
     * Internal UErrorCode, accessible to subclasses.
     * @stable ICU 4.2
     */
    UErrorCode errorCode;
    /**
     * Called by assertSuccess() if isFailure() is true.
     * A subclass should override this function to deal with a failure code:
     * Throw an exception, log an error, terminate the program, or similar.
     * @stable ICU 4.2
     */
    virtual void handleFailure() const {}
};

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */



U_NAMESPACE_BEGIN

ErrorCode::~ErrorCode() {}

UErrorCode ErrorCode::reset() {
    UErrorCode code = errorCode;
    errorCode = U_ZERO_ERROR;
    return code;
}

void ErrorCode::assertSuccess() const {
    if(isFailure()) {
        handleFailure();
    }
}

const char* ErrorCode::errorName() const {
  return u_errorName(errorCode);
}

U_NAMESPACE_END
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2014-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/


// #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION

// #include "cmemory.h"

// #include "unicode/filteredbrk.h"
// #include "unicode/ucharstriebuilder.h"
// #include "unicode/ures.h"

// #include "uresimp.h" // ures_getByKeyWithFallback
// #include "ubrkimpl.h" // U_ICUDATA_BRKITR
// #include "uvector.h"
// #include "cmemory.h"

// U_NAMESPACE_BEGIN

// #ifndef FB_DEBUG
// #define FB_DEBUG 0
// #endif

// #if FB_DEBUG
// #include <stdio.h>
// static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
//   char buf[2048];
//   if(s) {
//     s->extract(0,s->length(),buf,2048);
//   } else {
//     strcpy(buf,"NULL");
//   }
//   fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
//           f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
// }

// #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
// #else
// #define FB_TRACE(m,s,b,d)
// #endif

// /**
//  * Used with sortedInsert()
//  */
// static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
//     const UnicodeString &a = *(const UnicodeString*)t1.pointer;
//     const UnicodeString &b = *(const UnicodeString*)t2.pointer;
//     return a.compare(b);
// }

// /**
//  * A UVector which implements a set of strings.
//  */
// class U_COMMON_API UStringSet : public UVector {
//  public:
//   UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
//                                            uhash_compareUnicodeString,
//                                            1,
//                                            status) {}
//   virtual ~UStringSet();
//   /**
//    * Is this UnicodeSet contained?
//    */
//   inline UBool contains(const UnicodeString& s) {
//     return contains((void*) &s);
//   }
//   using UVector::contains;
//   /**
//    * Return the ith UnicodeString alias
//    */
//   inline const UnicodeString* getStringAt(int32_t i) const {
//     return (const UnicodeString*)elementAt(i);
//   }
//   /**
//    * Adopt the UnicodeString if not already contained.
//    * Caller no longer owns the pointer in any case.
//    * @return true if adopted successfully, false otherwise (error, or else duplicate)
//    */
//   inline UBool adopt(UnicodeString *str, UErrorCode &status) {
//     if(U_FAILURE(status) || contains(*str)) {
//       delete str;
//       return false;
//     } else {
//       sortedInsert(str, compareUnicodeString, status);
//       if(U_FAILURE(status)) {
//         delete str;
//         return false;
//       }
//       return true;
//     }
//   }
//   /**
//    * Add by value.
//    * @return true if successfully adopted.
//    */
//   inline UBool add(const UnicodeString& str, UErrorCode &status) {
//     if(U_FAILURE(status)) return false;
//     UnicodeString *t = new UnicodeString(str);
//     if(t==NULL) {
//       status = U_MEMORY_ALLOCATION_ERROR; return false;
//     }
//     return adopt(t, status);
//   }
//   /**
//    * Remove this string.
//    * @return true if successfully removed, false otherwise (error, or else it wasn't there)
//    */
//   inline UBool remove(const UnicodeString &s, UErrorCode &status) {
//     if(U_FAILURE(status)) return false;
//     return removeElement((void*) &s);
//   }
// };

// /**
//  * Virtual, won't be inlined
//  */
// UStringSet::~UStringSet() {}

// /* ----------------------------------------------------------- */


// /* Filtered Break constants */
// static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
// static const int32_t kMATCH   = (1<<1); //< exact match - skip this one.
// static const int32_t kSuppressInReverse = (1<<0);
// static const int32_t kAddToForward = (1<<1);
// static const UChar   kFULLSTOP = 0x002E; // '.'

// /**
//  * Shared data for SimpleFilteredSentenceBreakIterator
//  */
// class SimpleFilteredSentenceBreakData : public UMemory {
// public:
//   SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
//       : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
//   SimpleFilteredSentenceBreakData *incr() { refcount++;  return this; }
//   SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
//   virtual ~SimpleFilteredSentenceBreakData();

//   LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
//   LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
//   int32_t                     refcount;
// };

// SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}

// /**
//  * Concrete implementation
//  */
// class SimpleFilteredSentenceBreakIterator : public BreakIterator {
// public:
//   SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
//   SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
//   virtual ~SimpleFilteredSentenceBreakIterator();
// private:
//   SimpleFilteredSentenceBreakData *fData;
//   LocalPointer<BreakIterator> fDelegate;
//   LocalUTextPointer           fText;

//   /* -- subclass interface -- */
// public:
//   /* -- cloning and other subclass stuff -- */
//   virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
//                                              int32_t &/*BufferSize*/,
//                                              UErrorCode &status) {
//     // for now - always deep clone
//     status = U_SAFECLONE_ALLOCATED_WARNING;
//     return clone();
//   }
//   virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
//   virtual UClassID getDynamicClassID(void) const { return NULL; }
//   virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }

//   /* -- text modifying -- */
//   virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
//   virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
//   virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
//   virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }

//   /* -- other functions that are just delegated -- */
//   virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
//   virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }

//   /* -- ITERATION -- */
//   virtual int32_t first(void);
//   virtual int32_t preceding(int32_t offset);
//   virtual int32_t previous(void);
//   virtual UBool isBoundary(int32_t offset);
//   virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.

//   virtual int32_t next(void);

//   virtual int32_t next(int32_t n);
//   virtual int32_t following(int32_t offset);
//   virtual int32_t last(void);

// private:
//     /**
//      * Given that the fDelegate has already given its "initial" answer,
//      * find the NEXT actual (non-excepted) break.
//      * @param n initial position from delegate
//      * @return new break position or UBRK_DONE
//      */
//     int32_t internalNext(int32_t n);
//     /**
//      * Given that the fDelegate has already given its "initial" answer,
//      * find the PREV actual (non-excepted) break.
//      * @param n initial position from delegate
//      * @return new break position or UBRK_DONE
//      */
//     int32_t internalPrev(int32_t n);
//     /**
//      * set up the UText with the value of the fDelegate.
//      * Call this before calling breakExceptionAt.
//      * May be able to avoid excess calls
//      */
//     void resetState(UErrorCode &status);
//     /**
//      * Is there a match  (exception) at this spot?
//      */
//     enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
//     /**
//      * Determine if there is an exception at this spot
//      * @param n spot to check
//      * @return kNoExceptionHere or kExceptionHere
//      **/
//     enum EFBMatchResult breakExceptionAt(int32_t n);
// };

// SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
//   : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
// {
// }


// SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
//   BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
//   fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
//   fDelegate(adopt)
// {
//   // all set..
// }

// SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
//     fData = fData->decr();
// }

// void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
//   fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
// }

// SimpleFilteredSentenceBreakIterator::EFBMatchResult
// SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
//     int64_t bestPosn = -1;
//     int32_t bestValue = -1;
//     // loops while 'n' points to an exception.
//     utext_setNativeIndex(fText.getAlias(), n); // from n..
//     fData->fBackwardsTrie->reset();
//     UChar32 uch;

//     //if(debug2) u_printf(" n@ %d\n", n);
//     // Assume a space is following the '.'  (so we handle the case:  "Mr. /Brown")
//     if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) {  // TODO: skip a class of chars here??
//       // TODO only do this the 1st time?
//       //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
//     } else {
//       //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
//       uch = utext_next32(fText.getAlias());
//       //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
//     }

//     UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;

//     while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL  &&   // more to consume backwards and..
//           USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
//       if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
//         bestPosn = utext_getNativeIndex(fText.getAlias());
//         bestValue = fData->fBackwardsTrie->getValue();
//       }
//       //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
//     }

//     if(USTRINGTRIE_MATCHES(r)) { // exact match?
//       //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//       bestValue = fData->fBackwardsTrie->getValue();
//       bestPosn = utext_getNativeIndex(fText.getAlias());
//       //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//     }

//     if(bestPosn>=0) {
//       //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);

//       //if(USTRINGTRIE_MATCHES(r)) {  // matched - so, now what?
//       //int32_t bestValue = fBackwardsTrie->getValue();
//       ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (UChar)uch, r, bestValue);

//       if(bestValue == kMATCH) { // exact match!
//         //if(debug2) u_printf(" exact backward match\n");
//         return kExceptionHere; // See if the next is another exception.
//       } else if(bestValue == kPARTIAL
//                 && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
//         //if(debug2) u_printf(" partial backward match\n");
//         // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
//         // to see if it matches something going forward.
//         fData->fForwardsPartialTrie->reset();
//         UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
//         utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//         //if(debug2) u_printf("Retrying at %d\n", bestPosn);
//         while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
//               USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
//           //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
//         }
//         if(USTRINGTRIE_MATCHES(rfwd)) {
//           //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
//           // only full matches here, nothing to check
//           // skip the next:
//             return kExceptionHere;
//         } else {
//           //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
//           // no match (no exception) -return the 'underlying' break
//           return kNoExceptionHere;
//         }
//       } else {
//         return kNoExceptionHere; // internal error and/or no forwards trie
//       }
//     } else {
//       //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r);  // no best match
//       return kNoExceptionHere; // No match - so exit. Not an exception.
//     }
// }

// // the workhorse single next.
// int32_t
// SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
//   if(n == UBRK_DONE || // at end  or
//     fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
//       return n;
//   }
//   // OK, do we need to break here?
//   UErrorCode status = U_ZERO_ERROR;
//   // refresh text
//   resetState(status);
//   if(U_FAILURE(status)) return UBRK_DONE; // bail out
//   int64_t utextLen = utext_nativeLength(fText.getAlias());

//   //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
//   while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
//     SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);

//     switch(m) {
//     case kExceptionHere:
//       n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
//       continue;

//     default:
//     case kNoExceptionHere:
//       return n;
//     }
//   }
//   return n;
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
//   if(n == 0 || n == UBRK_DONE || // at end  or
//     fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
//       return n;
//   }
//   // OK, do we need to break here?
//   UErrorCode status = U_ZERO_ERROR;
//   // refresh text
//   resetState(status);
//   if(U_FAILURE(status)) return UBRK_DONE; // bail out

//   //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
//   while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
//     SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);

//     switch(m) {
//     case kExceptionHere:
//       n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
//       continue;

//     default:
//     case kNoExceptionHere:
//       return n;
//     }
//   }
//   return n;
// }


// int32_t
// SimpleFilteredSentenceBreakIterator::next() {
//   return internalNext(fDelegate->next());
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::first(void) {
//   // Don't suppress a break opportunity at the beginning of text.
//   return fDelegate->first();
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
//   return internalPrev(fDelegate->preceding(offset));
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::previous(void) {
//   return internalPrev(fDelegate->previous());
// }

// UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
//   if (!fDelegate->isBoundary(offset)) return false; // no break to suppress

//   if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions

//   UErrorCode status = U_ZERO_ERROR;
//   resetState(status);

//   SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);

//   switch(m) {
//   case kExceptionHere:
//     return false;
//   default:
//   case kNoExceptionHere:
//     return true;
//   }
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
//   return internalNext(fDelegate->next(offset));
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
//   return internalNext(fDelegate->following(offset));
// }

// int32_t
// SimpleFilteredSentenceBreakIterator::last(void) {
//   // Don't suppress a break opportunity at the end of text.
//   return fDelegate->last();
// }


// /**
//  * Concrete implementation of builder class.
//  */
// class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
// public:
//   virtual ~SimpleFilteredBreakIteratorBuilder();
//   SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
//   SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
//   virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
//   virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
//   virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
// private:
//   UStringSet fSet;
// };

// SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
// {
// }

// SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
//   : fSet(status)
// {
// }

// SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
//   : fSet(status)
// {
//   if(U_SUCCESS(status)) {
//     UErrorCode subStatus = U_ZERO_ERROR;
//     LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus));
//     if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
//       status = subStatus; // copy the failing status
// #if FB_DEBUG
//       fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
// #endif
//       return;  // leaves the builder empty, if you try to use it.
//     }
//     LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus));
//     if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
//       status = subStatus; // copy the failing status
// #if FB_DEBUG
//       fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
// #endif
//       return;  // leaves the builder empty, if you try to use it.
//     }
//     LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus));

// #if FB_DEBUG
//     {
//       UErrorCode subsub = subStatus;
//       fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus));
//     }
// #endif

//     if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
//       status = subStatus; // copy the failing status
// #if FB_DEBUG
//       fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
// #endif
//       return;  // leaves the builder empty, if you try to use it.
//     }

//     LocalUResourceBundlePointer strs;
//     subStatus = status; // Pick up inherited warning status now
//     do {
//       strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
//       if(strs.isValid() && U_SUCCESS(subStatus)) {
//         UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
//         suppressBreakAfter(str, status); // load the string
//       }
//     } while (strs.isValid() && U_SUCCESS(subStatus));
//     if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
//       status = subStatus;
//     }
//   }
// }

// UBool
// SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
// {
//   UBool r = fSet.add(exception, status);
//   FB_TRACE("suppressBreakAfter",&exception,r,0);
//   return r;
// }

// UBool
// SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
// {
//   UBool r = fSet.remove(exception, status);
//   FB_TRACE("unsuppressBreakAfter",&exception,r,0);
//   return r;
// }

// /**
//  * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
//  * Work around this.
//  *
//  * Note: "new UnicodeString[subCount]" ends up calling global operator new
//  * on MSVC2012 for some reason.
//  */
// static inline UnicodeString* newUnicodeStringArray(size_t count) {
//     return new UnicodeString[count ? count : 1];
// }

// BreakIterator *
// SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
//   LocalPointer<BreakIterator> adopt(adoptBreakIterator);

//   LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
//   LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
//   if(U_FAILURE(status)) {
//     return NULL;
//   }

//   int32_t revCount = 0;
//   int32_t fwdCount = 0;

//   int32_t subCount = fSet.size();

//   UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);

//   LocalArray<UnicodeString> ustrs(ustrs_ptr);

//   LocalMemory<int> partials;
//   partials.allocateInsteadAndReset(subCount);

//   LocalPointer<UCharsTrie>    backwardsTrie; //  i.e. ".srM" for Mrs.
//   LocalPointer<UCharsTrie>    forwardsPartialTrie; //  Has ".a" for "a.M."

//   int n=0;
//   for ( int32_t i = 0;
//         i<fSet.size();
//         i++) {
//     const UnicodeString *abbr = fSet.getStringAt(i);
//     if(abbr) {
//       FB_TRACE("build",abbr,TRUE,i);
//       ustrs[n] = *abbr; // copy by value
//       FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
//     } else {
//       FB_TRACE("build",abbr,FALSE,i);
//       status = U_MEMORY_ALLOCATION_ERROR;
//       return NULL;
//     }
//     partials[n] = 0; // default: not partial
//     n++;
//   }
//   // first pass - find partials.
//   for(int i=0;i<subCount;i++) {
//     int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
//     if(nn>-1 && (nn+1)!=ustrs[i].length()) {
//       FB_TRACE("partial",&ustrs[i],FALSE,i);
//       // is partial.
//       // is it unique?
//       int sameAs = -1;
//       for(int j=0;j<subCount;j++) {
//         if(j==i) continue;
//         if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
//           FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
//           //UBool otherIsPartial = ((nn+1)!=ustrs[j].length());  // true if ustrs[j] doesn't end at nn
//           if(partials[j]==0) { // hasn't been processed yet
//             partials[j] = kSuppressInReverse | kAddToForward;
//             FB_TRACE("suppressing",&ustrs[j],FALSE,j);
//           } else if(partials[j] & kSuppressInReverse) {
//             sameAs = j; // the other entry is already in the reverse table.
//           }
//         }
//       }
//       FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
//       FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
//       UnicodeString prefix(ustrs[i], 0, nn+1);
//       if(sameAs == -1 && partials[i] == 0) {
//         // first one - add the prefix to the reverse table.
//         prefix.reverse();
//         builder->add(prefix, kPARTIAL, status);
//         revCount++;
//         FB_TRACE("Added partial",&prefix,FALSE, i);
//         FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
//         partials[i] = kSuppressInReverse | kAddToForward;
//       } else {
//         FB_TRACE("NOT adding partial",&prefix,FALSE, i);
//         FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
//       }
//     }
//   }
//   for(int i=0;i<subCount;i++) {
//     if(partials[i]==0) {
//       ustrs[i].reverse();
//       builder->add(ustrs[i], kMATCH, status);
//       revCount++;
//       FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
//     } else {
//       FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);

//       // an optimization would be to only add the portion after the '.'
//       // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
//       // instead of "Ph.D." since we already know the "Ph." part is a match.
//       // would need the trie to be able to hold 0-length strings, though.
//       builder2->add(ustrs[i], kMATCH, status); // forward
//       fwdCount++;
//       //ustrs[i].reverse();
//       ////if(debug2) u_printf("SUPPRESS- not Added(%d):  /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
//     }
//   }
//   FB_TRACE("AbbrCount",NULL,FALSE, subCount);

//   if(revCount>0) {
//     backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
//     if(U_FAILURE(status)) {
//       FB_TRACE(u_errorName(status),NULL,FALSE, -1);
//       return NULL;
//     }
//   }

//   if(fwdCount>0) {
//     forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
//     if(U_FAILURE(status)) {
//       FB_TRACE(u_errorName(status),NULL,FALSE, -1);
//       return NULL;
//     }
//   }

//   return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
// }


// // ----------- Base class implementation

// FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
// }

// FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
// }

// FilteredBreakIteratorBuilder *
// FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
//   if(U_FAILURE(status)) return NULL;
//   LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
//   return (U_SUCCESS(status))? ret.orphan(): NULL;
// }

// FilteredBreakIteratorBuilder *
// FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
//   return createEmptyInstance(status);
// }

// FilteredBreakIteratorBuilder *
// FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
//   if(U_FAILURE(status)) return NULL;
//   LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
//   return (U_SUCCESS(status))? ret.orphan(): NULL;
// }

// U_NAMESPACE_END

// #endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2009-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  filterednormalizer2.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009dec10
*   created by: Markus W. Scherer
*/



#if !UCONFIG_NO_NORMALIZATION







// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1997-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  cpputils.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*/






/*==========================================================================*/
/* Array copy utility functions */
/*==========================================================================*/

// static
// inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
// { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }

// static
// inline void uprv_arrayCopy(const double* src, int32_t srcStart,
//               double* dst, int32_t dstStart, int32_t count)
// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }

static
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
    { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }

// static
// inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
//               int8_t* dst, int32_t dstStart, int32_t count)
// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }

// static
// inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
// { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }

// static
// inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
//               int16_t* dst, int32_t dstStart, int32_t count)
// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }

static
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }

// static
// inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
//               int32_t* dst, int32_t dstStart, int32_t count)
// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }

// static
// inline void
// uprv_arrayCopy(const UChar *src, int32_t srcStart,
//         UChar *dst, int32_t dstStart, int32_t count)
// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }

/**
 * Copy an array of UnicodeString OBJECTS (not pointers).
 * @internal
 */
static inline void
uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
{ while(count-- > 0) *dst++ = *src++; }

/**
 * Copy an array of UnicodeString OBJECTS (not pointers).
 * @internal
 */
static inline void
uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
               icu::UnicodeString *dst, int32_t dstStart, int32_t count)
{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }

/**
 * Checks that the string is readable and writable.
 * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
 */
inline void
uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
    if(U_SUCCESS(errorCode) && s.isBogus()) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    }
}



U_NAMESPACE_BEGIN

FilteredNormalizer2::~FilteredNormalizer2() {}

UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
                               UnicodeString &dest,
                               UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(src, errorCode);
    if(U_FAILURE(errorCode)) {
        dest.setToBogus();
        return dest;
    }
    if(&dest==&src) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return dest;
    }
    dest.remove();
    return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
}

// Internal: No argument checking, and appends to dest.
// Pass as input spanCondition the one that is likely to yield a non-zero
// span length at the start of src.
// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
// USET_SPAN_SIMPLE should be passed in for the start of src
// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
// an in-filter prefix.
UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
                               UnicodeString &dest,
                               USetSpanCondition spanCondition,
                               UErrorCode &errorCode) const {
    UnicodeString tempDest;  // Don't throw away destination buffer between iterations.
    for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
        int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
        int32_t spanLength=spanLimit-prevSpanLimit;
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            if(spanLength!=0) {
                dest.append(src, prevSpanLimit, spanLength);
            }
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            if(spanLength!=0) {
                // Not norm2.normalizeSecondAndAppend() because we do not want
                // to modify the non-filter part of dest.
                dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
                                            tempDest, errorCode));
                if(U_FAILURE(errorCode)) {
                    break;
                }
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return dest;
}

void
FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                                   Edits *edits, UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) {
        return;
    }
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
        edits->reset();
    }
    options |= U_EDITS_NO_RESET;  // Do not reset for each span.
    normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
}

void
FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
                                   ByteSink &sink, Edits *edits,
                                   USetSpanCondition spanCondition,
                                   UErrorCode &errorCode) const {
    while (length > 0) {
        int32_t spanLength = set.spanUTF8(src, length, spanCondition);
        if (spanCondition == USET_SPAN_NOT_CONTAINED) {
            if (spanLength != 0) {
                if (edits != nullptr) {
                    edits->addUnchanged(spanLength);
                }
                if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
                    sink.Append(src, spanLength);
                }
            }
            spanCondition = USET_SPAN_SIMPLE;
        } else {
            if (spanLength != 0) {
                // Not norm2.normalizeSecondAndAppend() because we do not want
                // to modify the non-filter part of dest.
                norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
                if (U_FAILURE(errorCode)) {
                    break;
                }
            }
            spanCondition = USET_SPAN_NOT_CONTAINED;
        }
        src += spanLength;
        length -= spanLength;
    }
}

UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                              const UnicodeString &second,
                                              UErrorCode &errorCode) const {
    return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}

UnicodeString &
FilteredNormalizer2::append(UnicodeString &first,
                            const UnicodeString &second,
                            UErrorCode &errorCode) const {
    return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}

UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                              const UnicodeString &second,
                                              UBool doNormalize,
                                              UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(first, errorCode);
    uprv_checkCanGetBuffer(second, errorCode);
    if(U_FAILURE(errorCode)) {
        return first;
    }
    if(&first==&second) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return first;
    }
    if(first.isEmpty()) {
        if(doNormalize) {
            return normalize(second, first, errorCode);
        } else {
            return first=second;
        }
    }
    // merge the in-filter suffix of the first string with the in-filter prefix of the second
    int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
    if(prefixLimit!=0) {
        UnicodeString prefix(second.tempSubString(0, prefixLimit));
        int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
        if(suffixStart==0) {
            if(doNormalize) {
                norm2.normalizeSecondAndAppend(first, prefix, errorCode);
            } else {
                norm2.append(first, prefix, errorCode);
            }
        } else {
            UnicodeString middle(first, suffixStart, INT32_MAX);
            if(doNormalize) {
                norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
            } else {
                norm2.append(middle, prefix, errorCode);
            }
            first.replace(suffixStart, INT32_MAX, middle);
        }
    }
    if(prefixLimit<second.length()) {
        UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
        if(doNormalize) {
            normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
        } else {
            first.append(rest);
        }
    }
    return first;
}

UBool
FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
    return set.contains(c) && norm2.getDecomposition(c, decomposition);
}

UBool
FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
    return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
}

UChar32
FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
    return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
}

uint8_t
FilteredNormalizer2::getCombiningClass(UChar32 c) const {
    return set.contains(c) ? norm2.getCombiningClass(c) : 0;
}

UBool
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return FALSE;
    }
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
                U_FAILURE(errorCode)
            ) {
                return FALSE;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return TRUE;
}

UBool
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) {
        return FALSE;
    }
    const char *s = sp.data();
    int32_t length = sp.length();
    USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
    while (length > 0) {
        int32_t spanLength = set.spanUTF8(s, length, spanCondition);
        if (spanCondition == USET_SPAN_NOT_CONTAINED) {
            spanCondition = USET_SPAN_SIMPLE;
        } else {
            if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
                    U_FAILURE(errorCode)) {
                return FALSE;
            }
            spanCondition = USET_SPAN_NOT_CONTAINED;
        }
        s += spanLength;
        length -= spanLength;
    }
    return TRUE;
}

UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return UNORM_MAYBE;
    }
    UNormalizationCheckResult result=UNORM_YES;
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            UNormalizationCheckResult qcResult=
                norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
            if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
                return qcResult;
            } else if(qcResult==UNORM_MAYBE) {
                result=qcResult;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return result;
}

int32_t
FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
        return 0;
    }
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
            spanCondition=USET_SPAN_SIMPLE;
        } else {
            int32_t yesLimit=
                prevSpanLimit+
                norm2.spanQuickCheckYes(
                    s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
            if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
                return yesLimit;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
    return s.length();
}

UBool
FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
    return !set.contains(c) || norm2.hasBoundaryBefore(c);
}

UBool
FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
    return !set.contains(c) || norm2.hasBoundaryAfter(c);
}

UBool
FilteredNormalizer2::isInert(UChar32 c) const {
    return !set.contains(c) || norm2.isInert(c);
}

U_NAMESPACE_END

// C API ------------------------------------------------------------------- ***

U_NAMESPACE_USE

U_CAPI UNormalizer2 * U_EXPORT2
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if(filterSet==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
                                             *UnicodeSet::fromUSet(filterSet));
    if(fn2==NULL) {
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    }
    return (UNormalizer2 *)fn2;
}

#endif  // !UCONFIG_NO_NORMALIZATION
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2009-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*/


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2009-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*/


/**
 * \file
 * \brief C API: access to ICU Data Version number
 */




/**
 * @stable ICU 49
 */
#define U_ICU_VERSION_BUNDLE "icuver"

/**
 * @stable ICU 49
 */
#define U_ICU_DATA_KEY "DataVersion"

/**
 * Retrieves the data version from icuver and stores it in dataVersionFillin.
 * 
 * @param dataVersionFillin icuver data version information to be filled in if not-null
 * @param status stores the error code from the calls to resource bundle
 * 
 * @stable ICU 49
 */
U_STABLE void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status);



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2000-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/





// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 1999-2016, International Business Machines
*                Corporation and others. All Rights Reserved.
******************************************************************************
*   file name:  uresdata.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999dec08
*   created by: Markus W. Scherer
*   06/24/02    weiv        Added support for resource sharing
*/



// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1999-2014, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  udata.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999oct25
*   created by: Markus W. Scherer
*/





U_CDECL_BEGIN

/**
 * \file
 * \brief C API: Data loading interface
 *
 * <h2>Information about data loading interface</h2>
 *
 * This API is used to find and efficiently load data for ICU and applications
 * using ICU. It provides an abstract interface that specifies a data type and
 * name to find and load the data. Normally this API is used by other ICU APIs
 * to load required data out of the ICU data library, but it can be used to
 * load data out of other places.
 *
 * See the User Guide Data Management chapter.
 */
 
#ifndef U_HIDE_INTERNAL_API
/**
 * Character used to separate package names from tree names 
 * @internal ICU 3.0
 */
#define U_TREE_SEPARATOR '-'

/**
 * String used to separate package names from tree names 
 * @internal ICU 3.0
 */
#define U_TREE_SEPARATOR_STRING "-"

/**
 * Character used to separate parts of entry names
 * @internal ICU 3.0
 */
#define U_TREE_ENTRY_SEP_CHAR '/'

/**
 * String used to separate parts of entry names
 * @internal ICU 3.0
 */
#define U_TREE_ENTRY_SEP_STRING "/"

/**
 * Alias for standard ICU data 
 * @internal ICU 3.0
 */
#define U_ICUDATA_ALIAS "ICUDATA"

#endif /* U_HIDE_INTERNAL_API */

/**
 * UDataInfo contains the properties about the requested data.
 * This is meta data.
 *
 * <p>This structure may grow in the future, indicated by the
 * <code>size</code> field.</p>
 *
 * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
 * The UDataInfo struct begins 4 bytes after the start of the data item,
 * so it is 4-aligned.
 *
 * <p>The platform data property fields help determine if a data
 * file can be efficiently used on a given machine.
 * The particular fields are of importance only if the data
 * is affected by the properties - if there is integer data
 * with word sizes > 1 byte, char* text, or UChar* text.</p>
 *
 * <p>The implementation for the <code>udata_open[Choice]()</code>
 * functions may reject data based on the value in <code>isBigEndian</code>.
 * No other field is used by the <code>udata</code> API implementation.</p>
 *
 * <p>The <code>dataFormat</code> may be used to identify
 * the kind of data, e.g. a converter table.</p>
 *
 * <p>The <code>formatVersion</code> field should be used to
 * make sure that the format can be interpreted.
 * It may be a good idea to check only for the one or two highest
 * of the version elements to allow the data memory to
 * get more or somewhat rearranged contents, for as long
 * as the using code can still interpret the older contents.</p>
 *
 * <p>The <code>dataVersion</code> field is intended to be a
 * common place to store the source version of the data;
 * for data from the Unicode character database, this could
 * reflect the Unicode version.</p>
 *
 * @stable ICU 2.0
 */
typedef struct {
    /** sizeof(UDataInfo)
     *  @stable ICU 2.0 */
    uint16_t size;

    /** unused, set to 0 
     *  @stable ICU 2.0*/
    uint16_t reservedWord;

    /* platform data properties */
    /** 0 for little-endian machine, 1 for big-endian
     *  @stable ICU 2.0 */
    uint8_t isBigEndian;

    /** see U_CHARSET_FAMILY values in utypes.h 
     *  @stable ICU 2.0*/
    uint8_t charsetFamily;

    /** sizeof(UChar), one of { 1, 2, 4 } 
     *  @stable ICU 2.0*/
    uint8_t sizeofUChar;

    /** unused, set to 0 
     *  @stable ICU 2.0*/
    uint8_t reservedByte;

    /** data format identifier 
     *  @stable ICU 2.0*/
    uint8_t dataFormat[4];

    /** versions: [0] major [1] minor [2] milli [3] micro 
     *  @stable ICU 2.0*/
    uint8_t formatVersion[4];

    /** versions: [0] major [1] minor [2] milli [3] micro 
     *  @stable ICU 2.0*/
    uint8_t dataVersion[4];
} UDataInfo;

/* API for reading data -----------------------------------------------------*/

/**
 * Forward declaration of the data memory type.
 * @stable ICU 2.0
 */
typedef struct UDataMemory UDataMemory;

/**
 * Callback function for udata_openChoice().
 * @param context parameter passed into <code>udata_openChoice()</code>.
 * @param type The type of the data as passed into <code>udata_openChoice()</code>.
 *             It may be <code>NULL</code>.
 * @param name The name of the data as passed into <code>udata_openChoice()</code>.
 * @param pInfo A pointer to the <code>UDataInfo</code> structure
 *              of data that has been loaded and will be returned
 *              by <code>udata_openChoice()</code> if this function
 *              returns <code>TRUE</code>.
 * @return TRUE if the current data memory is acceptable
 * @stable ICU 2.0
 */
typedef UBool U_CALLCONV
UDataMemoryIsAcceptable(void *context,
                        const char *type, const char *name,
                        const UDataInfo *pInfo);


/**
 * Convenience function.
 * This function works the same as <code>udata_openChoice</code>
 * except that any data that matches the type and name
 * is assumed to be acceptable.
 * @param path Specifies an absolute path and/or a basename for the
 *             finding of the data in the file system.
 *             <code>NULL</code> for ICU data.
 * @param type A string that specifies the type of data to be loaded.
 *             For example, resource bundles are loaded with type "res",
 *             conversion tables with type "cnv".
 *             This may be <code>NULL</code> or empty.
 * @param name A string that specifies the name of the data.
 * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
 * @return A pointer (handle) to a data memory object, or <code>NULL</code>
 *         if an error occurs. Call <code>udata_getMemory()</code>
 *         to get a pointer to the actual data.
 *
 * @see udata_openChoice
 * @stable ICU 2.0
 */
U_STABLE UDataMemory * U_EXPORT2
udata_open(const char *path, const char *type, const char *name,
           UErrorCode *pErrorCode);

/**
 * Data loading function.
 * This function is used to find and load efficiently data for
 * ICU and applications using ICU.
 * It provides an abstract interface that allows to specify a data
 * type and name to find and load the data.
 *
 * <p>The implementation depends on platform properties and user preferences
 * and may involve loading shared libraries (DLLs), mapping
 * files into memory, or fopen()/fread() files.
 * It may also involve using static memory or database queries etc.
 * Several or all data items may be combined into one entity
 * (DLL, memory-mappable file).</p>
 *
 * <p>The data is always preceded by a header that includes
 * a <code>UDataInfo</code> structure.
 * The caller's <code>isAcceptable()</code> function is called to make
 * sure that the data is useful. It may be called several times if it
 * rejects the data and there is more than one location with data
 * matching the type and name.</p>
 *
 * <p>If <code>path==NULL</code>, then ICU data is loaded.
 * Otherwise, it is separated into a basename and a basename-less directory string.
 * The basename is used as the data package name, and the directory is
 * logically prepended to the ICU data directory string.</p>
 *
 * <p>For details about ICU data loading see the User Guide
 * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
 *
 * @param path Specifies an absolute path and/or a basename for the
 *             finding of the data in the file system.
 *             <code>NULL</code> for ICU data.
 * @param type A string that specifies the type of data to be loaded.
 *             For example, resource bundles are loaded with type "res",
 *             conversion tables with type "cnv".
 *             This may be <code>NULL</code> or empty.
 * @param name A string that specifies the name of the data.
 * @param isAcceptable This function is called to verify that loaded data
 *                     is useful for the client code. If it returns FALSE
 *                     for all data items, then <code>udata_openChoice()</code>
 *                     will return with an error.
 * @param context Arbitrary parameter to be passed into isAcceptable.
 * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
 * @return A pointer (handle) to a data memory object, or <code>NULL</code>
 *         if an error occurs. Call <code>udata_getMemory()</code>
 *         to get a pointer to the actual data.
 * @stable ICU 2.0
 */
U_STABLE UDataMemory * U_EXPORT2
udata_openChoice(const char *path, const char *type, const char *name,
                 UDataMemoryIsAcceptable *isAcceptable, void *context,
                 UErrorCode *pErrorCode);

/**
 * Close the data memory.
 * This function must be called to allow the system to
 * release resources associated with this data memory.
 * @param pData The pointer to data memory object
 * @stable ICU 2.0
 */
U_STABLE void U_EXPORT2
udata_close(UDataMemory *pData);

/**
 * Get the pointer to the actual data inside the data memory.
 * The data is read-only.
 *
 * ICU data must be at least 8-aligned, and should be 16-aligned.
 *
 * @param pData The pointer to data memory object
 * @stable ICU 2.0
 */
U_STABLE const void * U_EXPORT2
udata_getMemory(UDataMemory *pData);

/**
 * Get the information from the data memory header.
 * This allows to get access to the header containing
 * platform data properties etc. which is not part of
 * the data itself and can therefore not be accessed
 * via the pointer that <code>udata_getMemory()</code> returns.
 *
 * @param pData pointer to the data memory object
 * @param pInfo pointer to a UDataInfo object;
 *              its <code>size</code> field must be set correctly,
 *              typically to <code>sizeof(UDataInfo)</code>.
 *
 * <code>*pInfo</code> will be filled with the UDataInfo structure
 * in the data memory object. If this structure is smaller than
 * <code>pInfo->size</code>, then the <code>size</code> will be
 * adjusted and only part of the structure will be filled.
 * @stable ICU 2.0
 */
U_STABLE void U_EXPORT2
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);

/**
 * This function bypasses the normal ICU data loading process and
 * allows you to force ICU's system data to come out of a user-specified
 * area in memory.
 *
 * ICU data must be at least 8-aligned, and should be 16-aligned.
 * See http://userguide.icu-project.org/icudata
 *
 * The format of this data is that of the icu common data file, as is
 * generated by the pkgdata tool with mode=common or mode=dll.
 * You can read in a whole common mode file and pass the address to the start of the
 * data, or (with the appropriate link options) pass in the pointer to
 * the data that has been loaded from a dll by the operating system,
 * as shown in this code:
 *
 *       extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
 *        // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
 *       UErrorCode  status = U_ZERO_ERROR;
 *
 *       udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
 *
 * It is important that the declaration be as above. The entry point
 * must not be declared as an extern void*.
 *
 * Starting with ICU 4.4, it is possible to set several data packages,
 * one per call to this function.
 * udata_open() will look for data in the multiple data packages in the order
 * in which they were set.
 * The position of the linked-in or default-name ICU .data package in the
 * search list depends on when the first data item is loaded that is not contained
 * in the already explicitly set packages.
 * If data was loaded implicitly before the first call to this function
 * (for example, via opening a converter, constructing a UnicodeString
 * from default-codepage data, using formatting or collation APIs, etc.),
 * then the default data will be first in the list.
 *
 * This function has no effect on application (non ICU) data.  See udata_setAppData()
 * for similar functionality for application data.
 *
 * @param data pointer to ICU common data
 * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
 * @stable ICU 2.0
 */
U_STABLE void U_EXPORT2
udata_setCommonData(const void *data, UErrorCode *err);


/**
 * This function bypasses the normal ICU data loading process for application-specific
 * data and allows you to force the it to come out of a user-specified
 * pointer.
 *
 * ICU data must be at least 8-aligned, and should be 16-aligned.
 * See http://userguide.icu-project.org/icudata
 *
 * The format of this data is that of the icu common data file, like 'icudt26l.dat'
 * or the corresponding shared library (DLL) file.
 * The application must read in or otherwise construct an image of the data and then
 * pass the address of it to this function.
 *
 *
 * Warning:  setAppData will set a U_USING_DEFAULT_WARNING code if
 *           data with the specifed path that has already been opened, or
 *           if setAppData with the same path has already been called.
 *           Any such calls to setAppData will have no effect.
 *
 *
 * @param packageName the package name by which the application will refer
 *             to (open) this data
 * @param data pointer to the data
 * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
 * @see udata_setCommonData
 * @stable ICU 2.0
 */
U_STABLE void U_EXPORT2
udata_setAppData(const char *packageName, const void *data, UErrorCode *err);

/**
 * Possible settings for udata_setFileAccess()
 * @see udata_setFileAccess
 * @stable ICU 3.4
 */
typedef enum UDataFileAccess {
    /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
    UDATA_FILES_FIRST,
    /** An alias for the default access mode. @stable ICU 3.4 */
    UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
    /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
    UDATA_ONLY_PACKAGES,
    /** ICU loads data from packages first, and only from single files
        if the data cannot be found in a package. @stable ICU 3.4 */
    UDATA_PACKAGES_FIRST,
    /** ICU does not access the file system for data loading. @stable ICU 3.4 */
    UDATA_NO_FILES,
#ifndef U_HIDE_DEPRECATED_API
    /**
     * Number of real UDataFileAccess values.
     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
     */
    UDATA_FILE_ACCESS_COUNT
#endif  // U_HIDE_DEPRECATED_API
} UDataFileAccess;

/**
 * This function may be called to control how ICU loads data. It must be called
 * before any ICU data is loaded, including application data loaded with 
 * ures/ResourceBundle or udata APIs. This function is not multithread safe.  
 * The results of calling it while other threads are loading data are undefined.
 * @param access The type of file access to be used
 * @param status Error code.
 * @see UDataFileAccess
 * @stable ICU 3.4 
 */
U_STABLE void U_EXPORT2
udata_setFileAccess(UDataFileAccess access, UErrorCode *status);

U_CDECL_END

#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUDataMemoryPointer
 * "Smart pointer" class, closes a UDataMemory via udata_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 4.4
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);

U_NAMESPACE_END

#endif  // U_SHOW_CPLUSPLUS_API






/**
 * Numeric constants for internal-only types of resource items.
 * These must use different numeric values than UResType constants
 * because they are used together.
 * Internal types are never returned by ures_getType().
 */
typedef enum {
    /** Include a negative value so that the compiler uses the same int type as for UResType. */
    URES_INTERNAL_NONE=-1,

    /** Resource type constant for tables with 32-bit count, key offsets and values. */
    URES_TABLE32=4,

    /**
     * Resource type constant for tables with 16-bit count, key offsets and values.
     * All values are URES_STRING_V2 strings.
     */
    URES_TABLE16=5,

    /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */
    URES_STRING_V2=6,

    /**
     * Resource type constant for arrays with 16-bit count and values.
     * All values are URES_STRING_V2 strings.
     */
    URES_ARRAY16=9

    /* Resource type 15 is not defined but effectively used by RES_BOGUS=0xffffffff. */
} UResInternalType;

/*
 * A Resource is a 32-bit value that has 2 bit fields:
 * 31..28   4-bit type, see enum below
 * 27..0    28-bit four-byte-offset or value according to the type
 */
typedef uint32_t Resource;

#define RES_BOGUS 0xffffffff
#define RES_MAX_OFFSET 0x0fffffff

#define RES_GET_TYPE(res) ((int32_t)((res)>>28UL))
#define RES_GET_OFFSET(res) ((res)&0x0fffffff)
#define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res))

/* get signed and unsigned integer values directly from the Resource handle
 * NOTE: For proper logging, please use the res_getInt() constexpr
 */
#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
#   define RES_GET_INT_NO_TRACE(res) (((int32_t)((res)<<4L))>>4L)
#else
#   define RES_GET_INT_NO_TRACE(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff)
#endif

#define RES_GET_UINT_NO_TRACE(res) ((res)&0x0fffffff)

#define URES_IS_ARRAY(type) ((int32_t)(type)==URES_ARRAY || (int32_t)(type)==URES_ARRAY16)
#define URES_IS_TABLE(type) ((int32_t)(type)==URES_TABLE || (int32_t)(type)==URES_TABLE16 || (int32_t)(type)==URES_TABLE32)
#define URES_IS_CONTAINER(type) (URES_IS_TABLE(type) || URES_IS_ARRAY(type))

#define URES_MAKE_RESOURCE(type, offset) (((Resource)(type)<<28)|(Resource)(offset))
#define URES_MAKE_EMPTY_RESOURCE(type) ((Resource)(type)<<28)

/* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
enum {
    /**
     * [0] contains the length of indexes[]
     * which is at most URES_INDEX_TOP of the latest format version
     *
     * formatVersion==1: all bits contain the length of indexes[]
     *   but the length is much less than 0xff;
     * formatVersion>1:
     *   only bits  7..0 contain the length of indexes[],
     *        bits 31..8 are reserved and set to 0
     * formatVersion>=3:
     *        bits 31..8 poolStringIndexLimit bits 23..0
     */
    URES_INDEX_LENGTH,
    /**
     * [1] contains the top of the key strings,
     *     same as the bottom of resources or UTF-16 strings, rounded up
     */
    URES_INDEX_KEYS_TOP,
    /** [2] contains the top of all resources */
    URES_INDEX_RESOURCES_TOP,
    /**
     * [3] contains the top of the bundle,
     *     in case it were ever different from [2]
     */
    URES_INDEX_BUNDLE_TOP,
    /** [4] max. length of any table */
    URES_INDEX_MAX_TABLE_LENGTH,
    /**
     * [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2)
     *
     * formatVersion>=3:
     *   bits 31..16 poolStringIndex16Limit
     *   bits 15..12 poolStringIndexLimit bits 27..24
     */
    URES_INDEX_ATTRIBUTES,
    /**
     * [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16),
     *     rounded up (new in formatVersion 2.0, ICU 4.4)
     */
    URES_INDEX_16BIT_TOP,
    /** [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */
    URES_INDEX_POOL_CHECKSUM,
    URES_INDEX_TOP
};

/*
 * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES].
 * New in formatVersion 1.2 (ICU 3.6).
 *
 * If set, then this resource bundle is a standalone bundle.
 * If not set, then the bundle participates in locale fallback, eventually
 * all the way to the root bundle.
 * If indexes[] is missing or too short, then the attribute cannot be determined
 * reliably. Dependency checking should ignore such bundles, and loading should
 * use fallbacks.
 */
#define URES_ATT_NO_FALLBACK 1

/*
 * Attributes for bundles that are, or use, a pool bundle.
 * A pool bundle provides key strings that are shared among several other bundles
 * to reduce their total size.
 * New in formatVersion 2 (ICU 4.4).
 */
#define URES_ATT_IS_POOL_BUNDLE 2
#define URES_ATT_USES_POOL_BUNDLE 4

/*
 * File format for .res resource bundle files
 *
 * ICU 56: New in formatVersion 3 compared with 2: -------------
 *
 * Resource bundles can optionally use shared string-v2 values
 * stored in the pool bundle.
 * If so, then the indexes[] contain two new values
 * in previously-unused bits of existing indexes[] slots:
 * - poolStringIndexLimit:
 *     String-v2 offsets (in 32-bit Resource words) below this limit
 *     point to pool bundle string-v2 values.
 * - poolStringIndex16Limit:
 *     Resource16 string-v2 offsets below this limit
 *     point to pool bundle string-v2 values.
 * Guarantee: poolStringIndex16Limit <= poolStringIndexLimit
 *
 * The local bundle's poolStringIndexLimit is greater than
 * any pool bundle string index used in the local bundle.
 * The poolStringIndexLimit should not be greater than
 * the maximum possible pool bundle string index.
 *
 * The maximum possible pool bundle string index is the index to the last non-NUL
 * pool string character, due to suffix sharing.
 *
 * In the pool bundle, there is no structure that lists the strings.
 * (The root resource is an empty Table.)
 * If the strings need to be enumerated (as genrb --usePoolBundle does),
 * then iterate through the pool bundle's 16-bit-units array from the beginning.
 * Stop at the end of the array, or when an explicit or implicit string length
 * would lead beyond the end of the array,
 * or when an apparent string is not NUL-terminated.
 * (Future genrb version might terminate the strings with
 * what looks like a large explicit string length.)
 *
 * ICU 4.4: New in formatVersion 2 compared with 1.3: -------------
 *
 * Three new resource types -- String-v2, Table16 and Array16 -- have their
 * values stored in a new array of 16-bit units between the table key strings
 * and the start of the other resources.
 *
 * genrb eliminates duplicates among Unicode string-v2 values.
 * Multiple Unicode strings may use the same offset and string data,
 * or a short string may point to the suffix of a longer string. ("Suffix sharing")
 * For example, one string "abc" may be reused for another string "bc" by pointing
 * to the second character. (Short strings-v2 are NUL-terminated
 * and not preceded by an explicit length value.)
 *
 * It is allowed for all resource types to share values.
 * The swapper code (ures_swap()) has been modified so that it swaps each item
 * exactly once.
 *
 * A resource bundle may use a special pool bundle. Some or all of the table key strings
 * of the using-bundle are omitted, and the key string offsets for such key strings refer
 * to offsets in the pool bundle.
 * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values
 * must match.
 * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle
 * is or uses a pool bundle.
 *
 * Table key strings must be compared in ASCII order, even if they are not
 * stored in ASCII.
 *
 * New in formatVersion 1.3 compared with 1.2: -------------
 *
 * genrb eliminates duplicates among key strings.
 * Multiple table items may share one key string, or one item may point
 * to the suffix of another's key string. ("Suffix sharing")
 * For example, one key "abc" may be reused for another key "bc" by pointing
 * to the second character. (Key strings are NUL-terminated.)
 *
 * -------------
 *
 * An ICU4C resource bundle file (.res) is a binary, memory-mappable file
 * with nested, hierarchical data structures.
 * It physically contains the following:
 *
 *   Resource root; -- 32-bit Resource item, root item for this bundle's tree;
 *                     currently, the root item must be a table or table32 resource item
 *   int32_t indexes[indexes[0]]; -- array of indexes for friendly
 *                                   reading and swapping; see URES_INDEX_* above
 *                                   new in formatVersion 1.1 (ICU 2.8)
 *   char keys[]; -- characters for key strings
 *                   (formatVersion 1.0: up to 65k of characters; 1.1: <2G)
 *                   (minus the space for root and indexes[]),
 *                   which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated;
 *                   padded to multiple of 4 bytes for 4-alignment of the following data
 *   uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units
 *                             (new in formatVersion 2/ICU 4.4)
 *                             data is indexed by the offset values in 16-bit resource types,
 *                             with offset 0 pointing to the beginning of this array;
 *                             there is a 0 at offset 0, for empty resources;
 *                             padded to multiple of 4 bytes for 4-alignment of the following data
 *   data; -- data directly and indirectly indexed by the root item;
 *            the structure is determined by walking the tree
 *
 * Each resource bundle item has a 32-bit Resource handle (see typedef above)
 * which contains the item type number in its upper 4 bits (31..28) and either
 * an offset or a direct value in its lower 28 bits (27..0).
 * The order of items is undefined and only determined by walking the tree.
 * Leaves of the tree may be stored first or last or anywhere in between,
 * and it is in theory possible to have unreferenced holes in the file.
 *
 * 16-bit-unit values:
 * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special
 * array of 16-bit units. Each resource value is a sequence of 16-bit units,
 * with no per-resource padding to a 4-byte boundary.
 * 16-bit container types (Table16 and Array16) contain Resource16 values
 * which are offsets to String-v2 resources in the same 16-bit-units array.
 *
 * Direct values:
 * - Empty Unicode strings have an offset value of 0 in the Resource handle itself.
 * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for
 *   _any_ resource type indicates an empty value.
 * - Integer values are 28-bit values stored in the Resource handle itself;
 *   the interpretation of unsigned vs. signed integers is up to the application.
 *
 * All other types and values use 28-bit offsets to point to the item's data.
 * The offset is an index to the first 32-bit word of the value, relative to the
 * start of the resource data (i.e., the root item handle is at offset 0).
 * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits).
 * All resource item values are 4-aligned.
 *
 * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array,
 * indexing 16-bit units in that array.
 *
 * The structures (memory layouts) for the values for each item type are listed
 * in the table below.
 *
 * Nested, hierarchical structures: -------------
 *
 * Table items contain key-value pairs where the keys are offsets to char * key strings.
 * The values of these pairs are either Resource handles or
 * offsets into the 16-bit-units array, depending on the table type.
 *
 * Array items are simple vectors of Resource handles,
 * or of offsets into the 16-bit-units array, depending on the array type.
 *
 * Table key string offsets: -------
 *
 * Key string offsets are relative to the start of the resource data (of the root handle),
 * i.e., the first string has an offset of 4+sizeof(indexes).
 * (After the 4-byte root handle and after the indexes array.)
 *
 * If the resource bundle uses a pool bundle, then some key strings are stored
 * in the pool bundle rather than in the local bundle itself.
 * - In a Table or Table16, the 16-bit key string offset is local if it is
 *   less than indexes[URES_INDEX_KEYS_TOP]<<2.
 *   Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into
 *   the pool bundle key strings.
 * - In a Table32, the 32-bit key string offset is local if it is non-negative.
 *   Otherwise, reset bit 31 to get the pool key string offset.
 *
 * Unlike the local offset, the pool key offset is relative to
 * the start of the key strings, not to the start of the bundle.
 *
 * An alias item is special (and new in ICU 2.4): --------------
 *
 * Its memory layout is just like for a UnicodeString, but at runtime it resolves to
 * another resource bundle's item according to the path in the string.
 * This is used to share items across bundles that are in different lookup/fallback
 * chains (e.g., large collation data among zh_TW and zh_HK).
 * This saves space (for large items) and maintenance effort (less duplication of data).
 *
 * --------------------------------------------------------------------------
 *
 * Resource types:
 *
 * Most resources have their values stored at four-byte offsets from the start
 * of the resource data. These values are at least 4-aligned.
 * Some resource values are stored directly in the offset field of the Resource itself.
 * See UResType in unicode/ures.h for enumeration constants for Resource types.
 *
 * Some resources have their values stored as sequences of 16-bit units,
 * at 2-byte offsets from the start of a contiguous 16-bit-unit array between
 * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4)
 * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources.
 *
 * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2
 * resources, with the offsets relative to the start of the 16-bit-units array.
 * Starting with formatVersion 3/ICU 56, if offset<poolStringIndex16Limit
 * then use the pool bundle's 16-bit-units array,
 * otherwise subtract that limit and use the local 16-bit-units array.
 *
 * Type Name            Memory layout of values
 *                      (in parentheses: scalar, non-offset values)
 *
 * 0  Unicode String:   int32_t length, UChar[length], (UChar)0, (padding)
 *                  or  (empty string ("") if offset==0)
 * 1  Binary:           int32_t length, uint8_t[length], (padding)
 *                      - the start of the bytes is 16-aligned -
 * 2  Table:            uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count]
 * 3  Alias:            (physically same value layout as string, new in ICU 2.4)
 * 4  Table32:          int32_t count, int32_t keyStringOffsets[count], Resource[count]
 *                      (new in formatVersion 1.1/ICU 2.8)
 * 5  Table16:          uint16_t count, uint16_t keyStringOffsets[count], Resource16[count]
 *                      (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
 * 6  Unicode String-v2:UChar[length], (UChar)0; length determined by the first UChar:
 *                      - if first is not a trail surrogate, then the length is implicit
 *                        and u_strlen() needs to be called
 *                      - if first<0xdfef then length=first&0x3ff (and skip first)
 *                      - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar
 *                      - if first==0xdfff then length=((second UChar)<<16) | third UChar
 *                      (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
 *
 *                      Starting with formatVersion 3/ICU 56, if offset<poolStringIndexLimit
 *                      then use the pool bundle's 16-bit-units array,
 *                      otherwise subtract that limit and use the local 16-bit-units array.
 *                      (Note different limits for Resource16 vs. Resource.)
 *
 * 7  Integer:          (28-bit offset is integer value)
 * 8  Array:            int32_t count, Resource[count]
 * 9  Array16:          uint16_t count, Resource16[count]
 *                      (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
 * 14 Integer Vector:   int32_t length, int32_t[length]
 * 15 Reserved:         This value denotes special purpose resources and is for internal use.
 *
 * Note that there are 3 types with data vector values:
 * - Vectors of 8-bit bytes stored as type Binary.
 * - Vectors of 16-bit words stored as type Unicode String or Unicode String-v2
 *                     (no value restrictions, all values 0..ffff allowed!).
 * - Vectors of 32-bit words stored as type Integer Vector.
 */

/*
 * Structure for a single, memory-mapped ResourceBundle.
 */
typedef struct ResourceData {
    UDataMemory *data;
    const int32_t *pRoot;
    const uint16_t *p16BitUnits;
    const char *poolBundleKeys;
    Resource rootRes;
    int32_t localKeyLimit;
    const uint16_t *poolBundleStrings;
    int32_t poolStringIndexLimit;
    int32_t poolStringIndex16Limit;
    UBool noFallback; /* see URES_ATT_NO_FALLBACK */
    UBool isPoolBundle;
    UBool usesPoolBundle;
    UBool useNativeStrcmp;
} ResourceData;

/*
 * Read a resource bundle from memory.
 */
U_INTERNAL void U_EXPORT2
res_read(ResourceData *pResData,
         const UDataInfo *pInfo, const void *inBytes, int32_t length,
         UErrorCode *errorCode);

/*
 * Load a resource bundle file.
 * The ResourceData structure must be allocated externally.
 */
U_CFUNC void
res_load(ResourceData *pResData,
         const char *path, const char *name, UErrorCode *errorCode);

/*
 * Release a resource bundle file.
 * This does not release the ResourceData structure itself.
 */
U_CFUNC void
res_unload(ResourceData *pResData);

U_INTERNAL UResType U_EXPORT2
res_getPublicType(Resource res);

///////////////////////////////////////////////////////////////////////////
// To enable tracing, use the inline versions of the res_get* functions. //
///////////////////////////////////////////////////////////////////////////

/*
 * Return a pointer to a zero-terminated, const UChar* string
 * and set its length in *pLength.
 * Returns NULL if not found.
 */
U_INTERNAL const UChar * U_EXPORT2
res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);

U_INTERNAL const uint8_t * U_EXPORT2
res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);

U_INTERNAL const int32_t * U_EXPORT2
res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength);

U_INTERNAL const UChar * U_EXPORT2
res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength);

U_INTERNAL Resource U_EXPORT2
res_getResource(const ResourceData *pResData, const char *key);

U_INTERNAL int32_t U_EXPORT2
res_countArrayItems(const ResourceData *pResData, Resource res);

U_INTERNAL Resource U_EXPORT2
res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexS);

U_INTERNAL Resource U_EXPORT2
res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t indexS, const char ** key);

U_INTERNAL Resource U_EXPORT2
res_getTableItemByKey(const ResourceData *pResData, Resource table, int32_t *indexS, const char* * key);

/**
 * Iterates over the path and stops when a scalar resource is found.
 * Follows aliases.
 * Modifies the contents of *path (replacing separators with NULs),
 * and also moves *path forward while it finds items.
 *
 * @param path input: "CollationElements/Sequence" or "zoneStrings/3/2" etc.;
 *             output: points to the part that has not yet been processed
 */
U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r,
                                  char** path, const char** key);

#ifdef __cplusplus

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015-2016, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* resource.h
*
* created on: 2015nov04
* created by: Markus W. Scherer
*/


/**
 * \file
 * \brief ICU resource bundle key and value types.
 */

// Note: Ported from ICU4J class UResource and its nested classes,
// but the C++ classes are separate, not nested.

// We use the Resource prefix for C++ classes, as usual.
// The UResource prefix would be used for C types.




// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html




#if U_ENABLE_TRACING

struct UResourceBundle;

U_NAMESPACE_BEGIN

class CharString;

/**
 * Instances of this class store information used to trace reads from resource
 * bundles when ICU is built with --enable-tracing.
 *
 * All arguments of type const UResourceBundle*, const char*, and
 * const ResourceTracer& are stored as pointers. The caller must retain
 * ownership for the lifetime of this ResourceTracer.
 *
 * Exported as U_COMMON_API for Windows because it is a value field
 * in other exported types.
 */
class U_COMMON_API ResourceTracer {
public:
    ResourceTracer() :
        fResB(nullptr),
        fParent(nullptr),
        fKey(nullptr),
        fIndex(-1) {}

    ResourceTracer(const UResourceBundle* resB) :
        fResB(resB),
        fParent(nullptr),
        fKey(nullptr),
        fIndex(-1) {}

    ResourceTracer(const UResourceBundle* resB, const char* key) :
        fResB(resB),
        fParent(nullptr),
        fKey(key),
        fIndex(-1) {}

    ResourceTracer(const UResourceBundle* resB, int32_t index) :
        fResB(resB),
        fParent(nullptr),
        fKey(nullptr),
        fIndex(index) {}

    ResourceTracer(const ResourceTracer& parent, const char* key) :
        fResB(nullptr),
        fParent(&parent),
        fKey(key),
        fIndex(-1) {}

    ResourceTracer(const ResourceTracer& parent, int32_t index) :
        fResB(nullptr),
        fParent(&parent),
        fKey(nullptr),
        fIndex(index) {}

    ~ResourceTracer();

    void trace(const char* type) const;
    void traceOpen() const;

    /**
     * Calls trace() if the resB or parent provided to the constructor was
     * non-null; otherwise, does nothing.
     */
    void maybeTrace(const char* type) const {
        if (fResB || fParent) {
            trace(type);
        }
    }

private:
    const UResourceBundle* fResB;
    const ResourceTracer* fParent;
    const char* fKey;
    int32_t fIndex;

    CharString& getFilePath(CharString& output, UErrorCode& status) const;

    CharString& getResPath(CharString& output, UErrorCode& status) const;
};

/**
 * This class provides methods to trace data file reads when ICU is built
 * with --enable-tracing.
 */
class FileTracer {
public:
    static void traceOpen(const char* path, const char* type, const char* name);

private:
    static void traceOpenDataFile(const char* path, const char* type, const char* name);
    static void traceOpenResFile(const char* path, const char* name);
};

U_NAMESPACE_END

#else // U_ENABLE_TRACING

U_NAMESPACE_BEGIN

/**
 * Default trivial implementation when --enable-tracing is not used.
 */
class U_COMMON_API ResourceTracer {
public:
    ResourceTracer() {}

    ResourceTracer(const void*) {}

    ResourceTracer(const void*, const char*) {}

    ResourceTracer(const void*, int32_t) {}

    ResourceTracer(const ResourceTracer&, const char*) {}

    ResourceTracer(const ResourceTracer&, int32_t) {}

    void trace(const char*) const {}

    void traceOpen() const {}

    void maybeTrace(const char*) const {}
};

/**
 * Default trivial implementation when --enable-tracing is not used.
 */
class FileTracer {
public:
    static void traceOpen(const char*, const char*, const char*) {}
};

U_NAMESPACE_END

#endif // U_ENABLE_TRACING



struct ResourceData;

U_NAMESPACE_BEGIN

class ResourceValue;

// Note: In C++, we use const char * pointers for keys,
// rather than an abstraction like Java UResource.Key.

/**
 * Interface for iterating over a resource bundle array resource.
 */
class U_COMMON_API ResourceArray {
public:
    /** Constructs an empty array object. */
    ResourceArray() : items16(NULL), items32(NULL), length(0) {}

    /** Only for implementation use. @internal */
    ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len,
                  const ResourceTracer& traceInfo) :
            items16(i16), items32(i32), length(len),
            fTraceInfo(traceInfo) {}

    /**
     * @return The number of items in the array resource.
     */
    int32_t getSize() const { return length; }
    /**
     * @param i Array item index.
     * @param value Output-only, receives the value of the i'th item.
     * @return TRUE if i is non-negative and less than getSize().
     */
    UBool getValue(int32_t i, ResourceValue &value) const;

    /** Only for implementation use. @internal */
    uint32_t internalGetResource(const ResourceData *pResData, int32_t i) const;

private:
    const uint16_t *items16;
    const uint32_t *items32;
    int32_t length;
    ResourceTracer fTraceInfo;
};

/**
 * Interface for iterating over a resource bundle table resource.
 */
class U_COMMON_API ResourceTable {
public:
    /** Constructs an empty table object. */
    ResourceTable() : keys16(NULL), keys32(NULL), items16(NULL), items32(NULL), length(0) {}

    /** Only for implementation use. @internal */
    ResourceTable(const uint16_t *k16, const int32_t *k32,
                  const uint16_t *i16, const uint32_t *i32, int32_t len,
                  const ResourceTracer& traceInfo) :
            keys16(k16), keys32(k32), items16(i16), items32(i32), length(len),
            fTraceInfo(traceInfo) {}

    /**
     * @return The number of items in the array resource.
     */
    int32_t getSize() const { return length; }
    /**
     * @param i Table item index.
     * @param key Output-only, receives the key of the i'th item.
     * @param value Output-only, receives the value of the i'th item.
     * @return TRUE if i is non-negative and less than getSize().
     */
    UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;

    /**
     * @param key Key string to find in the table.
     * @param value Output-only, receives the value of the item with that key.
     * @return TRUE if the table contains the key.
     */
    UBool findValue(const char *key, ResourceValue &value) const;

private:
    const uint16_t *keys16;
    const int32_t *keys32;
    const uint16_t *items16;
    const uint32_t *items32;
    int32_t length;
    ResourceTracer fTraceInfo;
};

/**
 * Represents a resource bundle item's value.
 * Avoids object creations as much as possible.
 * Mutable, not thread-safe.
 */
class U_COMMON_API ResourceValue : public UObject {
public:
    virtual ~ResourceValue();

    /**
     * @return ICU resource type, for example, URES_STRING
     */
    virtual UResType getType() const = 0;

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not a string resource.
     *
     * @see ures_getString()
     */
    virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const = 0;

    inline UnicodeString getUnicodeString(UErrorCode &errorCode) const {
        int32_t len = 0;
        const UChar *r = getString(len, errorCode);
        return UnicodeString(TRUE, r, len);
    }

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not an alias resource.
     */
    virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const = 0;

    inline UnicodeString getAliasUnicodeString(UErrorCode &errorCode) const {
        int32_t len = 0;
        const UChar *r = getAliasString(len, errorCode);
        return UnicodeString(TRUE, r, len);
    }

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource.
     *
     * @see ures_getInt()
     */
    virtual int32_t getInt(UErrorCode &errorCode) const = 0;

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource.
     *
     * @see ures_getUInt()
     */
    virtual uint32_t getUInt(UErrorCode &errorCode) const = 0;

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not an intvector resource.
     *
     * @see ures_getIntVector()
     */
    virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const = 0;

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not a binary-blob resource.
     *
     * @see ures_getBinary()
     */
    virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const = 0;

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource
     */
    virtual ResourceArray getArray(UErrorCode &errorCode) const = 0;

    /**
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not a table resource
     */
    virtual ResourceTable getTable(UErrorCode &errorCode) const = 0;

    /**
     * Is this a no-fallback/no-inheritance marker string?
     * Such a marker is used for
     * CLDR no-fallback data values of (three empty-set symbols)=={2205, 2205, 2205}
     * when enumerating tables with fallback from the specific resource bundle to root.
     *
     * @return TRUE if this is a no-inheritance marker string
     */
    virtual UBool isNoInheritanceMarker() const = 0;

    /**
     * Sets the dest strings from the string values in this array resource.
     *
     * @return the number of strings in this array resource.
     *     If greater than capacity, then an overflow error is set.
     *
     * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource
     *     or if any of the array items is not a string
     */
    virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity,
                                   UErrorCode &errorCode) const = 0;

    /**
     * Same as
     * <pre>
     * if (getType() == URES_STRING) {
     *     return new String[] { getString(); }
     * } else {
     *     return getStringArray();
     * }
     * </pre>
     *
     * Sets U_RESOURCE_TYPE_MISMATCH if this is
     *     neither a string resource nor an array resource containing strings
     * @see getString()
     * @see getStringArray()
     */
    virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
                                                  UErrorCode &errorCode) const = 0;

    /**
     * Same as
     * <pre>
     * if (getType() == URES_STRING) {
     *     return getString();
     * } else {
     *     return getStringArray()[0];
     * }
     * </pre>
     *
     * Sets U_RESOURCE_TYPE_MISMATCH if this is
     *     neither a string resource nor an array resource containing strings
     * @see getString()
     * @see getStringArray()
     */
    virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const = 0;

protected:
    ResourceValue() {}

private:
    ResourceValue(const ResourceValue &);  // no copy constructor
    ResourceValue &operator=(const ResourceValue &);  // no assignment operator
};

/**
 * Sink for ICU resource bundle contents.
 */
class U_COMMON_API ResourceSink : public UObject {
public:
    ResourceSink() {}
    virtual ~ResourceSink();

    /**
     * Called once for each bundle (child-parent-...-root).
     * The value is normally an array or table resource,
     * and implementations of this method normally iterate over the
     * tree of resource items stored there.
     *
     * @param key The key string of the enumeration-start resource.
     *     Empty if the enumeration starts at the top level of the bundle.
     * @param value Call getArray() or getTable() as appropriate.
     *     Then reuse for output values from Array and Table getters.
     * @param noFallback true if the bundle has no parent;
     *     that is, its top-level table has the nofallback attribute,
     *     or it is the root bundle of a locale tree.
     */
    virtual void put(const char *key, ResourceValue &value, UBool noFallback,
                     UErrorCode &errorCode) = 0;

private:
    ResourceSink(const ResourceSink &);  // no copy constructor
    ResourceSink &operator=(const ResourceSink &);  // no assignment operator
};

U_NAMESPACE_END




U_NAMESPACE_BEGIN

inline const UChar* res_getString(const ResourceTracer& traceInfo,
        const ResourceData *pResData, Resource res, int32_t *pLength) {
    traceInfo.trace("string");
    return res_getStringNoTrace(pResData, res, pLength);
}

inline const uint8_t* res_getBinary(const ResourceTracer& traceInfo,
        const ResourceData *pResData, Resource res, int32_t *pLength) {
    traceInfo.trace("binary");
    return res_getBinaryNoTrace(pResData, res, pLength);
}

inline const int32_t* res_getIntVector(const ResourceTracer& traceInfo,
        const ResourceData *pResData, Resource res, int32_t *pLength) {
    traceInfo.trace("intvector");
    return res_getIntVectorNoTrace(pResData, res, pLength);
}

inline int32_t res_getInt(const ResourceTracer& traceInfo, Resource res) {
    traceInfo.trace("int");
    return RES_GET_INT_NO_TRACE(res);
}

inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) {
    traceInfo.trace("uint");
    return RES_GET_UINT_NO_TRACE(res);
}

class ResourceDataValue : public ResourceValue {
public:
    ResourceDataValue() :
        res(static_cast<Resource>(URES_NONE)),
        fTraceInfo() {}
    virtual ~ResourceDataValue();

    void setData(const ResourceData *data) {
        resData = *data;
    }

    void setResource(Resource r, ResourceTracer&& traceInfo) {
        res = r;
        fTraceInfo = traceInfo;
    }

    const ResourceData &getData() const { return resData; }
    virtual UResType getType() const;
    virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
    virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
    virtual int32_t getInt(UErrorCode &errorCode) const;
    virtual uint32_t getUInt(UErrorCode &errorCode) const;
    virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const;
    virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const;
    virtual ResourceArray getArray(UErrorCode &errorCode) const;
    virtual ResourceTable getTable(UErrorCode &errorCode) const;
    virtual UBool isNoInheritanceMarker() const;
    virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity,
                                   UErrorCode &errorCode) const;
    virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
                                                  UErrorCode &errorCode) const;
    virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;

private:
    // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
    // then remove this value field again and just store a pResData pointer.
    ResourceData resData;
    Resource res;
    ResourceTracer fTraceInfo;
};

U_NAMESPACE_END

#endif  /* __cplusplus */

/**
 * Swap an ICU resource bundle. See udataswp.h.
 * @internal
 */
U_CAPI int32_t U_EXPORT2
ures_swap(const UDataSwapper *ds,
          const void *inData, int32_t length, void *outData,
          UErrorCode *pErrorCode);



#define kRootLocaleName         "root"
#define kPoolBundleName         "pool"

/*
 The default minor version and the version separator must be exactly one
 character long.
*/

#define kDefaultMinorVersion    "0"
#define kVersionSeparator       "."
#define kVersionTag             "Version"

#define MAGIC1 19700503
#define MAGIC2 19641227

#define URES_MAX_ALIAS_LEVEL 256
#define URES_MAX_BUFFER_SIZE 256

#define EMPTY_SET 0x2205

struct UResourceDataEntry;
typedef struct UResourceDataEntry UResourceDataEntry;

/*
 * Note: If we wanted to make this structure smaller, then we could try
 * to use one UResourceDataEntry pointer for fAlias and fPool, with a separate
 * flag to distinguish whether this struct is for a real bundle with a pool,
 * or for an alias entry for which we won't use the pool after loading.
 */
struct UResourceDataEntry {
    char *fName; /* name of the locale for bundle - still to decide whether it is original or fallback */
    char *fPath; /* path to bundle - used for distinguishing between resources with the same name */
    UResourceDataEntry *fParent; /*next resource in fallback chain*/
    UResourceDataEntry *fAlias;
    UResourceDataEntry *fPool;
    ResourceData fData; /* data for low level access */
    char fNameBuffer[3]; /* A small buffer of free space for fName. The free space is due to struct padding. */
    uint32_t fCountExisting; /* how much is this resource used */
    UErrorCode fBogus;
    /* int32_t fHashKey;*/ /* for faster access in the hashtable */
};

#define RES_BUFSIZE 64
#define RES_PATH_SEPARATOR   '/'
#define RES_PATH_SEPARATOR_S   "/"

struct UResourceBundle {
    const char *fKey; /*tag*/
    UResourceDataEntry *fData; /*for low-level access*/
    char *fVersion;
    UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
    char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
    // TODO(ICU-20769): Try to change the by-value fResData into a pointer,
    // with the struct in only one place for each bundle.
    // Also replace class ResourceDataValue.resData with a pResData pointer again.
    ResourceData fResData;
    char fResBuf[RES_BUFSIZE];
    int32_t fResPathLen;
    Resource fRes;
    UBool fHasFallback;
    UBool fIsTopLevel;
    uint32_t fMagic1;   /* For determining if it's a stack object */
    uint32_t fMagic2;   /* For determining if it's a stack object */
    int32_t fIndex;
    int32_t fSize;

    /*const UResourceBundle *fParentRes;*/ /* needed to get the actual locale for a child resource */
};

U_CAPI void U_EXPORT2 ures_initStackObject(UResourceBundle* resB);

#ifdef __cplusplus

U_NAMESPACE_BEGIN

/**
 * \class StackUResourceBundle
 * "Smart pointer" like class, closes a UResourceBundle via ures_close().
 *
 * This code:
 *
 *   StackUResourceBundle bundle;
 *   foo(bundle.getAlias());
 *
 * Is equivalent to this code:
 *
 *   UResourceBundle bundle;
 *   ures_initStackObject(&bundle);
 *   foo(&bundle);
 *   ures_close(&bundle);
 *
 * @see LocalUResourceBundlePointer
 * @internal
 */
class U_COMMON_API StackUResourceBundle {
public:
    // No heap allocation. Use only on the stack.
    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
#if U_HAVE_PLACEMENT_NEW
    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
#endif

    StackUResourceBundle();
    ~StackUResourceBundle();

    UResourceBundle* getAlias() { return &bundle; }

    UResourceBundle& ref() { return bundle; }
    const UResourceBundle& ref() const { return bundle; }

    StackUResourceBundle(const StackUResourceBundle&) = delete;
    StackUResourceBundle& operator=(const StackUResourceBundle&) = delete;

    StackUResourceBundle(StackUResourceBundle&&) = delete;
    StackUResourceBundle& operator=(StackUResourceBundle&&) = delete;

private:
    UResourceBundle bundle;
};

U_NAMESPACE_END

#endif  /* __cplusplus */

/**
 * Opens a resource bundle for the locale;
 * if there is not even a base language bundle, then loads the root bundle;
 * never falls back to the default locale.
 *
 * This is used for algorithms that have good pan-Unicode default behavior,
 * such as case mappings, collation, and segmentation (BreakIterator).
 */
U_CAPI UResourceBundle* U_EXPORT2
ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status);

/* Some getters used by the copy constructor */
U_CFUNC const char* ures_getName(const UResourceBundle* resB);
#ifdef URES_DEBUG
U_CFUNC const char* ures_getPath(const UResourceBundle* resB);
/**
 * If anything was in the RB cache, dump it to the screen.
 * @return TRUE if there was anything into the cache
 */
U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void);
#endif
/*U_CFUNC void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd);*/
/*U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd);*/
/*U_CFUNC void ures_freeResPath(UResourceBundle *resB);*/

/* Candidates for export */
U_CFUNC UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status);

/**
 * Returns a resource that can be located using the pathToResource argument. One needs optional package, locale
 * and path inside the locale, for example: "/myData/en/zoneStrings/3". Keys and indexes are supported. Keys
 * need to reference data in named structures, while indexes can reference both named and anonymous resources.
 * Features a fill-in parameter. 
 * 
 * Note, this function does NOT have a syntax for specifying items within a tree.  May want to consider a
 * syntax that delineates between package/tree and resource.  
 *
 * @param pathToResource    a path that will lead to the requested resource
 * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
 *                          Alternatively, you can supply a struct to be filled by this function.
 * @param status            fills in the outgoing error code.
 * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
 */
U_CAPI UResourceBundle* U_EXPORT2
ures_findResource(const char* pathToResource, 
                  UResourceBundle *fillIn, UErrorCode *status); 

/**
 * Returns a sub resource that can be located using the pathToResource argument. One needs a path inside 
 * the supplied resource, for example, if you have "en_US" resource bundle opened, you might ask for
 * "zoneStrings/3". Keys and indexes are supported. Keys
 * need to reference data in named structures, while indexes can reference both 
 * named and anonymous resources.
 * Features a fill-in parameter. 
 *
 * @param resourceBundle    a resource
 * @param pathToResource    a path that will lead to the requested resource
 * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
 *                          Alternatively, you can supply a struct to be filled by this function.
 * @param status            fills in the outgoing error code.
 * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
 */
U_CAPI UResourceBundle* U_EXPORT2
ures_findSubResource(const UResourceBundle *resB, 
                     char* pathToResource, 
                     UResourceBundle *fillIn, UErrorCode *status);

/**
 * Returns a functionally equivalent locale (considering keywords) for the specified keyword.
 * @param result fillin for the equivalent locale
 * @param resultCapacity capacity of the fillin buffer
 * @param path path to the tree, or NULL for ICU data
 * @param resName top level resource. Example: "collations"
 * @param keyword locale keyword. Example: "collation"
 * @param locid The requested locale
 * @param isAvailable If non-null, pointer to fillin parameter that indicates whether the 
 * requested locale was available. The locale is defined as 'available' if it physically 
 * exists within the specified tree.
 * @param omitDefault if TRUE, omit keyword and value if default. 'de_DE\@collation=standard' -> 'de_DE'
 * @param status error code
 * @return  the actual buffer size needed for the full locale.  If it's greater 
 * than resultCapacity, the returned full name will be truncated and an error code will be returned.
 */
U_CAPI int32_t U_EXPORT2
ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, 
                             const char *path, const char *resName, const char *keyword, const char *locid,
                             UBool *isAvailable, UBool omitDefault, UErrorCode *status);

/**
 * Given a tree path and keyword, return a string enumeration of all possible values for that keyword.
 * @param path path to the tree, or NULL for ICU data
 * @param keyword a particular keyword to consider, must match a top level resource name 
 * within the tree.
 * @param status error code
 */
U_CAPI UEnumeration* U_EXPORT2
ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status);


/**
 * Get a resource with multi-level fallback. Normally only the top level resources will
 * fallback to its parent. This performs fallback on subresources. For example, when a table
 * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs
 * on the sub-resources because the table is defined in the current resource bundle, but this
 * function can perform fallback on the sub-resources of the table.
 * @param resB              a resource
 * @param inKey             a key associated with the requested resource
 * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be deleted by the caller.
 *                          Alternatively, you can supply a struct to be filled by this function.
 * @param status: fills in the outgoing error code
 *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
 *                could be a non-failing error 
 *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
 * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
 */
U_CAPI UResourceBundle* U_EXPORT2 
ures_getByKeyWithFallback(const UResourceBundle *resB, 
                          const char* inKey, 
                          UResourceBundle *fillIn, 
                          UErrorCode *status);


/**
 * Get a String with multi-level fallback. Normally only the top level resources will
 * fallback to its parent. This performs fallback on subresources. For example, when a table
 * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs
 * on the sub-resources because the table is defined in the current resource bundle, but this
 * function can perform fallback on the sub-resources of the table.
 * @param resB              a resource
 * @param inKey             a key associated with the requested resource
 * @param status: fills in the outgoing error code
 *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
 *                could be a non-failing error 
 *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
 * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
 */
U_CAPI const UChar* U_EXPORT2 
ures_getStringByKeyWithFallback(const UResourceBundle *resB, 
                          const char* inKey,  
                          int32_t* len,
                          UErrorCode *status);

#ifdef __cplusplus

U_CAPI void U_EXPORT2
ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
                          UResourceBundle *tempFillIn,
                          icu::ResourceDataValue &value, UErrorCode &errorCode);

U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
                             icu::ResourceSink &sink, UErrorCode &errorCode);

#endif  /* __cplusplus */

/**
 * Get a version number by key
 * @param resB bundle containing version number
 * @param key the key for the version number
 * @param ver fillin for the version number
 * @param status error code
 */
U_CAPI void U_EXPORT2
ures_getVersionByKey(const UResourceBundle *resB,
                     const char *key,
                     UVersionInfo ver,
                     UErrorCode *status);


/**
 * Internal function.
 * Return the version number associated with this ResourceBundle as a string.
 *
 * @param resourceBundle The resource bundle for which the version is checked.
 * @return  A version number string as specified in the resource bundle or its parent.
 *          The caller does not own this string.
 * @see ures_getVersion
 */
U_CAPI const char* U_EXPORT2 
ures_getVersionNumberInternal(const UResourceBundle *resourceBundle);

/**
 * Return the name of the Locale associated with this ResourceBundle. This API allows
 * you to query for the real locale of the resource. For example, if you requested 
 * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. 
 * For subresources, the locale where this resource comes from will be returned.
 * If fallback has occured, getLocale will reflect this.
 *
 * This internal version avoids deprecated-warnings in ICU code.
 *
 * @param resourceBundle resource bundle in question
 * @param status just for catching illegal arguments
 * @return  A Locale name
 */
U_CAPI const char* U_EXPORT2 
ures_getLocaleInternal(const UResourceBundle* resourceBundle, 
               UErrorCode* status);

/**
 * Same as ures_openDirect() but uses the fill-in parameter instead of allocating a new bundle.
 * 
 * @param r The existing UResourceBundle to fill in. If NULL then status will be
 *          set to U_ILLEGAL_ARGUMENT_ERROR.
 * @param packageName   The packageName and locale together point to an ICU udata object,
 *                      as defined by <code> udata_open( packageName, "res", locale, err) </code>
 *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
 *                      a package registered with udata_setAppData(). Using a full file or directory
 *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
 * @param locale  specifies the locale for which we want to open the resource
 *                if NULL, the default locale will be used. If strlen(locale) == 0
 *                root locale will be used.
 * @param status The error code.
 * @see ures_openDirect
 * @internal
 */
U_CAPI void U_EXPORT2
ures_openDirectFillIn(UResourceBundle *r,
                      const char *packageName,
                      const char *locale,
                      UErrorCode *status);

 /* for ures_getVersionByKey */

U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
    UResourceBundle *icudatares = NULL;
    
    if (U_FAILURE(*status)) {
        return;
    }
    
    if (dataVersionFillin != NULL) {
        icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
        if (U_SUCCESS(*status)) {
            ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
        }
        ures_close(icudatares);
    }
}
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2009-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
*  FILE NAME : icuplug.c
*
*   Date         Name        Description
*   10/29/2009   sl          New.
******************************************************************************
*/

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2009-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
*  FILE NAME : icuplug.h
*
*   Date         Name        Description
*   10/29/2009   sl          New.
******************************************************************************
*/

/**
 * \file
 * \brief C API: ICU Plugin API 
 *
 * <h2>C API: ICU Plugin API</h2>
 *
 * <p>C API allowing run-time loadable modules that extend or modify ICU functionality.</p>
 *
 * <h3>Loading and Configuration</h3>
 *
 * <p>At ICU startup time, the environment variable "ICU_PLUGINS" will be 
 * queried for a directory name.  If it is not set, the preprocessor symbol 
 * "DEFAULT_ICU_PLUGINS" will be checked for a default value.</p>
 *
 * <p>Within the above-named directory, the file  "icuplugins##.txt" will be 
 * opened, if present, where ## is the major+minor number of the currently 
 * running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)</p>
 *
 * <p>The configuration file has this format:</p>
 *
 * <ul>
 * <li>Hash (#) begins a comment line</li>
 * 
 * <li>Non-comment lines have two or three components:
 * LIBRARYNAME     ENTRYPOINT     [ CONFIGURATION .. ]</li>
 *
 * <li>Tabs or spaces separate the three items.</li>
 *
 * <li>LIBRARYNAME is the name of a shared library, either a short name if 
 * it is on the loader path,  or a full pathname.</li>
 *
 * <li>ENTRYPOINT is the short (undecorated) symbol name of the plugin's 
 * entrypoint, as above.</li>
 *
 * <li>CONFIGURATION is the entire rest of the line . It's passed as-is to 
 * the plugin.</li>
 * </ul>
 *
 * <p>An example configuration file is, in its entirety:</p>
 *
 * \code
 * # this is icuplugins44.txt
 * testplug.dll    myPlugin        hello=world
 * \endcode
 * <p>Plugins are categorized as "high" or "low" level.  Low level are those 
 * which must be run BEFORE high level plugins, and before any operations 
 * which cause ICU to be 'initialized'.  If a plugin is low level but 
 * causes ICU to allocate memory or become initialized, that plugin is said 
 * to cause a 'level change'. </p>
 *
 * <p>At load time, ICU first queries all plugins to determine their level, 
 * then loads all 'low' plugins first, and then loads all 'high' plugins.  
 * Plugins are otherwise loaded in the order listed in the configuration file.</p>
 * 
 * <h3>Implementing a Plugin</h3>
 * \code
 * U_CAPI UPlugTokenReturn U_EXPORT2 
 * myPlugin (UPlugData *plug, UPlugReason reason, UErrorCode *status) {
 *   if(reason==UPLUG_REASON_QUERY) {
 *      uplug_setPlugName(plug, "Simple Plugin");
 *      uplug_setPlugLevel(plug, UPLUG_LEVEL_HIGH);
 *    } else if(reason==UPLUG_REASON_LOAD) {
 *       ... Set up some ICU things here.... 
 *    } else if(reason==UPLUG_REASON_UNLOAD) {
 *       ... unload, clean up ...
 *    }
 *   return UPLUG_TOKEN;
 *  }
 * \endcode
 *
 * <p>The UPlugData*  is an opaque pointer to the plugin-specific data, and is 
 * used in all other API calls.</p>
 *
 * <p>The API contract is:</p>
 * <ol><li>The plugin MUST always return UPLUG_TOKEN as a return value- to 
 * indicate that it is a valid plugin.</li>
 *
 * <li>When the 'reason' parameter is set to UPLUG_REASON_QUERY,  the 
 * plugin MUST call uplug_setPlugLevel() to indicate whether it is a high 
 * level or low level plugin.</li>
 *
 * <li>When the 'reason' parameter is UPLUG_REASON_QUERY, the plugin 
 * SHOULD call uplug_setPlugName to indicate a human readable plugin name.</li></ol>
 * 
 *
 * \internal ICU 4.4 Technology Preview
 */






#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN)



/* === Basic types === */

#ifndef U_HIDE_INTERNAL_API
/**
 * @{
 * Opaque structure passed to/from a plugin. 
 * use the APIs to access it.
 * @internal ICU 4.4 Technology Preview
 */

struct UPlugData;
typedef struct UPlugData UPlugData;

/** @} */

/**
 * Random Token to identify a valid ICU plugin. Plugins must return this 
 * from the entrypoint.
 * @internal ICU 4.4 Technology Preview
 */
#define UPLUG_TOKEN 0x54762486

/**
 * Max width of names, symbols, and configuration strings
 * @internal ICU 4.4 Technology Preview
 */
#define UPLUG_NAME_MAX              100


/**
 * Return value from a plugin entrypoint. 
 * Must always be set to UPLUG_TOKEN
 * @see UPLUG_TOKEN
 * @internal ICU 4.4 Technology Preview
 */
typedef uint32_t UPlugTokenReturn;

/**
 * Reason code for the entrypoint's call
 * @internal ICU 4.4 Technology Preview
 */
typedef enum {
    UPLUG_REASON_QUERY = 0,     /**< The plugin is being queried for info. **/
    UPLUG_REASON_LOAD = 1,     /**< The plugin is being loaded. **/
    UPLUG_REASON_UNLOAD = 2,   /**< The plugin is being unloaded. **/
    /**
     * Number of known reasons.
     * @internal The numeric value may change over time, see ICU ticket #12420.
     */
    UPLUG_REASON_COUNT
} UPlugReason;


/**
 * Level of plugin loading
 *     INITIAL:  UNKNOWN
 *       QUERY:   INVALID ->  { LOW | HIGH }
 *     ERR -> INVALID
 * @internal ICU 4.4 Technology Preview
 */
typedef enum {
    UPLUG_LEVEL_INVALID = 0,     /**< The plugin is invalid, hasn't called uplug_setLevel, or can't load. **/
    UPLUG_LEVEL_UNKNOWN = 1,     /**< The plugin is waiting to be installed. **/
    UPLUG_LEVEL_LOW     = 2,     /**< The plugin must be called before u_init completes **/
    UPLUG_LEVEL_HIGH    = 3,     /**< The plugin can run at any time. **/
    /**
     * Number of known levels.
     * @internal The numeric value may change over time, see ICU ticket #12420.
     */
    UPLUG_LEVEL_COUNT
} UPlugLevel;

/**
 * Entrypoint for an ICU plugin.
 * @param plug the UPlugData handle. 
 * @param status the plugin's extended status code.
 * @return A valid plugin must return UPLUG_TOKEN
 * @internal ICU 4.4 Technology Preview
 */
typedef UPlugTokenReturn (U_EXPORT2 UPlugEntrypoint) (
                  UPlugData *plug,
                  UPlugReason reason,
                  UErrorCode *status);

/* === Needed for Implementing === */

/**
 * Request that this plugin not be unloaded at cleanup time.
 * This is appropriate for plugins which cannot be cleaned up.
 * @see u_cleanup()
 * @param plug plugin
 * @param dontUnload  set true if this plugin can't be unloaded
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void U_EXPORT2 
uplug_setPlugNoUnload(UPlugData *plug, UBool dontUnload);

/**
 * Set the level of this plugin.
 * @param plug plugin data handle
 * @param level the level of this plugin
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void U_EXPORT2
uplug_setPlugLevel(UPlugData *plug, UPlugLevel level);

/**
 * Get the level of this plugin.
 * @param plug plugin data handle
 * @return the level of this plugin
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL UPlugLevel U_EXPORT2
uplug_getPlugLevel(UPlugData *plug);

/**
 * Get the lowest level of plug which can currently load.
 * For example, if UPLUG_LEVEL_LOW is returned, then low level plugins may load
 * if UPLUG_LEVEL_HIGH is returned, then only high level plugins may load.
 * @return the lowest level of plug which can currently load
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL UPlugLevel U_EXPORT2
uplug_getCurrentLevel(void);


/**
 * Get plug load status
 * @return The error code of this plugin's load attempt.
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL UErrorCode U_EXPORT2
uplug_getPlugLoadStatus(UPlugData *plug); 

/**
 * Set the human-readable name of this plugin.
 * @param plug plugin data handle
 * @param name the name of this plugin. The first UPLUG_NAME_MAX characters willi be copied into a new buffer.
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void U_EXPORT2
uplug_setPlugName(UPlugData *plug, const char *name);

/**
 * Get the human-readable name of this plugin.
 * @param plug plugin data handle
 * @return the name of this plugin
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL const char * U_EXPORT2
uplug_getPlugName(UPlugData *plug);

/**
 * Return the symbol name for this plugin, if known.
 * @param plug plugin data handle
 * @return the symbol name, or NULL
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL const char * U_EXPORT2
uplug_getSymbolName(UPlugData *plug);

/**
 * Return the library name for this plugin, if known.
 * @param plug plugin data handle
 * @param status error code
 * @return the library name, or NULL
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL const char * U_EXPORT2
uplug_getLibraryName(UPlugData *plug, UErrorCode *status);

/**
 * Return the library used for this plugin, if known.
 * Plugins could use this to load data out of their 
 * @param plug plugin data handle
 * @return the library, or NULL
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void * U_EXPORT2
uplug_getLibrary(UPlugData *plug);

/**
 * Return the plugin-specific context data.
 * @param plug plugin data handle
 * @return the context, or NULL if not set
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void * U_EXPORT2
uplug_getContext(UPlugData *plug);

/**
 * Set the plugin-specific context data.
 * @param plug plugin data handle
 * @param context new context to set
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void U_EXPORT2
uplug_setContext(UPlugData *plug, void *context);


/**
 * Get the configuration string, if available.
 * The string is in the platform default codepage.
 * @param plug plugin data handle
 * @return configuration string, or else null.
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL const char * U_EXPORT2
uplug_getConfiguration(UPlugData *plug);

/**
 * Return all currently installed plugins, from newest to oldest
 * Usage Example:
 * \code
 *    UPlugData *plug = NULL;
 *    while(plug=uplug_nextPlug(plug)) {
 *        ... do something with 'plug' ...
 *    }
 * \endcode
 * Not thread safe- do not call while plugs are added or removed.
 * @param prior pass in 'NULL' to get the first (most recent) plug, 
 *  otherwise pass the value returned on a prior call to uplug_nextPlug
 * @return the next oldest plugin, or NULL if no more.
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL UPlugData* U_EXPORT2
uplug_nextPlug(UPlugData *prior);

/**
 * Inject a plugin as if it were loaded from a library.
 * This is useful for testing plugins. 
 * Note that it will have a 'NULL' library pointer associated
 * with it, and therefore no llibrary will be closed at cleanup time.
 * Low level plugins may not be able to load, as ordering can't be enforced.
 * @param entrypoint entrypoint to install
 * @param config user specified configuration string, if available, or NULL.
 * @param status error result
 * @return the new UPlugData associated with this plugin, or NULL if error.
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL UPlugData* U_EXPORT2
uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status);


/**
 * Inject a plugin from a library, as if the information came from a config file.
 * Low level plugins may not be able to load, and ordering can't be enforced.
 * @param libName DLL name to load
 * @param sym symbol of plugin (UPlugEntrypoint function)
 * @param config configuration string, or NULL
 * @param status error result
 * @return the new UPlugData associated with this plugin, or NULL if error.
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL UPlugData* U_EXPORT2
uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status);

/**
 * Remove a plugin. 
 * Will request the plugin to be unloaded, and close the library if needed
 * @param plug plugin handle to close
 * @param status error result
 * @internal ICU 4.4 Technology Preview
 */
U_INTERNAL void U_EXPORT2
uplug_removePlug(UPlugData *plug, UErrorCode *status);
#endif  /* U_HIDE_INTERNAL_API */

#endif /* UCONFIG_ENABLE_PLUGINS */




#if UCONFIG_ENABLE_PLUGINS


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 2009-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
*  FILE NAME : icuplugimp.h
* 
*  Internal functions for the ICU plugin system
*
*   Date         Name        Description
*   10/29/2009   sl          New.
******************************************************************************
*/





#if UCONFIG_ENABLE_PLUGINS

/*========================*/
/** @{ Library Manipulation  
 */

/**
 * Open a library, adding a reference count if needed.
 * @param libName library name to load
 * @param status error code
 * @return the library pointer, or NULL
 * @internal internal use only
 */
U_INTERNAL void * U_EXPORT2
uplug_openLibrary(const char *libName, UErrorCode *status);

/**
 * Close a library, if its reference count is 0
 * @param lib the library to close
 * @param status error code
 * @internal internal use only
 */
U_INTERNAL void U_EXPORT2
uplug_closeLibrary(void *lib, UErrorCode *status);

/**
 * Get a library's name, or NULL if not found.
 * @param lib the library's name
 * @param status error code
 * @return the library name, or NULL if not found.
 * @internal internal use only
 */
U_INTERNAL  char * U_EXPORT2
uplug_findLibrary(void *lib, UErrorCode *status);

/** @} */

/*========================*/
/** {@ ICU Plugin internal interfaces
 */

/**
 * Initialize the plugins 
 * @param status error result
 * @internal - Internal use only.
 */
U_INTERNAL void U_EXPORT2
uplug_init(UErrorCode *status);

/**
 * Get raw plug N
 * @internal - Internal use only
 */ 
U_INTERNAL UPlugData* U_EXPORT2
uplug_getPlugInternal(int32_t n);

/**
 * Get the name of the plugin file. 
 * @internal - Internal use only.
 */
U_INTERNAL const char* U_EXPORT2
uplug_getPluginFile(void);

/** @} */

#endif






#include <stdio.h>
#ifdef __MVS__  /* defined by z/OS compiler */
#define _POSIX_SOURCE
#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
#endif


using namespace icu;

#ifndef UPLUG_TRACE
#define UPLUG_TRACE 0
#endif

#if UPLUG_TRACE
#include <stdio.h>
#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
#endif

/**
 * Internal structure of an ICU plugin. 
 */

struct UPlugData {
  UPlugEntrypoint  *entrypoint; /**< plugin entrypoint */
  uint32_t structSize;    /**< initialized to the size of this structure */
  uint32_t token;         /**< must be U_PLUG_TOKEN */
  void *lib;              /**< plugin library, or NULL */
  char libName[UPLUG_NAME_MAX];   /**< library name */
  char sym[UPLUG_NAME_MAX];        /**< plugin symbol, or NULL */
  char config[UPLUG_NAME_MAX];     /**< configuration data */
  void *context;          /**< user context data */
  char name[UPLUG_NAME_MAX];   /**< name of plugin */
  UPlugLevel  level; /**< level of plugin */
  UBool   awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
  UBool   dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
  UErrorCode pluginStatus; /**< status code of plugin */
};



#define UPLUG_LIBRARY_INITIAL_COUNT 8
#define UPLUG_PLUGIN_INITIAL_COUNT 12

/**
 * Remove an item
 * @param list the full list
 * @param listSize the number of entries in the list
 * @param memberSize the size of one member
 * @param itemToRemove the item number of the member
 * @return the new listsize 
 */
static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
  uint8_t *bytePtr = (uint8_t *)list;
    
  /* get rid of some bad cases first */
  if(listSize<1) {
    return listSize;
  }
    
  /* is there anything to move? */
  if(listSize > itemToRemove+1) {
    memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
  }
    
  return listSize-1;
}




#if U_ENABLE_DYLOAD
/**
 * Library management. Internal. 
 * @internal
 */
struct UPlugLibrary;

/**
 * Library management. Internal. 
 * @internal
 */
typedef struct UPlugLibrary {
  void *lib;                           /**< library ptr */
  char name[UPLUG_NAME_MAX]; /**< library name */
  uint32_t ref;                        /**< reference count */
} UPlugLibrary;

static UPlugLibrary   staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
static UPlugLibrary * libraryList = staticLibraryList;
static int32_t libraryCount = 0;
static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;

/**
 * Search for a library. Doesn't lock
 * @param libName libname to search for
 * @return the library's struct
 */
static int32_t searchForLibraryName(const char *libName) {
  int32_t i;
    
  for(i=0;i<libraryCount;i++) {
    if(!uprv_strcmp(libName, libraryList[i].name)) {
      return i;
    }
  }
  return -1;
}

static int32_t searchForLibrary(void *lib) {
  int32_t i;
    
  for(i=0;i<libraryCount;i++) {
    if(lib==libraryList[i].lib) {
      return i;
    }
  }
  return -1;
}

U_INTERNAL char * U_EXPORT2
uplug_findLibrary(void *lib, UErrorCode *status) {
  int32_t libEnt;
  char *ret = NULL;
  if(U_FAILURE(*status)) {
    return NULL;
  }
  libEnt = searchForLibrary(lib);
  if(libEnt!=-1) { 
    ret = libraryList[libEnt].name;
  } else {
    *status = U_MISSING_RESOURCE_ERROR;
  }
  return ret;
}

U_INTERNAL void * U_EXPORT2
uplug_openLibrary(const char *libName, UErrorCode *status) {
  int32_t libEntry = -1;
  void *lib = NULL;
    
  if(U_FAILURE(*status)) return NULL;

  libEntry = searchForLibraryName(libName);
  if(libEntry == -1) {
    libEntry = libraryCount++;
    if(libraryCount >= libraryMax) {
      /* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
      *status = U_MEMORY_ALLOCATION_ERROR;
#if UPLUG_TRACE
      DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
#endif
      return NULL;
    }
    /* Some operating systems don't want 
       DL operations from multiple threads. */
    libraryList[libEntry].lib = uprv_dl_open(libName, status);
#if UPLUG_TRACE
    DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
#endif
        
    if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
      /* cleanup. */
      libraryList[libEntry].lib = NULL; /* failure with open */
      libraryList[libEntry].name[0] = 0;
#if UPLUG_TRACE
      DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
#endif
      /* no need to free - just won't increase the count. */
      libraryCount--;
    } else { /* is it still there? */
      /* link it in */
      uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
      libraryList[libEntry].ref=1;
      lib = libraryList[libEntry].lib;
    }

  } else {
    lib = libraryList[libEntry].lib;
    libraryList[libEntry].ref++;
  }
  return lib;
}

U_INTERNAL void U_EXPORT2
uplug_closeLibrary(void *lib, UErrorCode *status) {
  int32_t i;
    
#if UPLUG_TRACE
  DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
#endif
  if(U_FAILURE(*status)) return;
    
  for(i=0;i<libraryCount;i++) {
    if(lib==libraryList[i].lib) {
      if(--(libraryList[i].ref) == 0) {
        uprv_dl_close(libraryList[i].lib, status);
        libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
      }
      return;
    }
  }
  *status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
}

#endif

static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
static int32_t pluginCount = 0;



  
static int32_t uplug_pluginNumber(UPlugData* d) {
  UPlugData *pastPlug = &pluginList[pluginCount];
  if(d<=pluginList) {
    return 0;
  } else if(d>=pastPlug) {
    return pluginCount;
  } else {
    return (d-pluginList)/sizeof(pluginList[0]);
  }
}


U_CAPI UPlugData * U_EXPORT2
uplug_nextPlug(UPlugData *prior) {
  if(prior==NULL) {
    return pluginList;
  } else {
    UPlugData *nextPlug = &prior[1];
    UPlugData *pastPlug = &pluginList[pluginCount];
    
    if(nextPlug>=pastPlug) {
      return NULL;
    } else {
      return nextPlug;
    }
  }
}



/**
 * Call the plugin with some params
 */
static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
  UPlugTokenReturn token;
  if(plug==NULL||U_FAILURE(*status)) {
    return;
  }
  token = (*(plug->entrypoint))(plug, reason, status);
  if(token!=UPLUG_TOKEN) {
    *status = U_INTERNAL_PROGRAM_ERROR;
  }
}


static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
  if(plug->awaitingLoad) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
    *status = U_INTERNAL_PROGRAM_ERROR;
    return; 
  }
  if(U_SUCCESS(plug->pluginStatus)) {
    /* Don't unload a plug which has a failing load status - means it didn't actually load. */
    uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
  }
}

static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
  if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
    *status = U_INTERNAL_PROGRAM_ERROR;
    return; 
  }
  plug->level = UPLUG_LEVEL_INVALID;
  uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
  if(U_SUCCESS(*status)) { 
    if(plug->level == UPLUG_LEVEL_INVALID) {
      plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
      plug->awaitingLoad = FALSE;
    }
  } else {
    plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
    plug->awaitingLoad = FALSE;
  }
}


static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
  if(U_FAILURE(*status)) {
    return;
  }
  if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
    *status = U_INTERNAL_PROGRAM_ERROR;
    return;
  }
  uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
  plug->awaitingLoad = FALSE;
  if(!U_SUCCESS(*status)) {
    plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
  }
}

static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
{
  UPlugData *plug = NULL;

  if(U_FAILURE(*status)) {
    return NULL;
  }

  if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
    *status = U_MEMORY_ALLOCATION_ERROR;
    return NULL;
  }

  plug = &pluginList[pluginCount++];

  plug->token = UPLUG_TOKEN;
  plug->structSize = sizeof(UPlugData);
  plug->name[0]=0;
  plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
  plug->awaitingLoad = TRUE;
  plug->dontUnload = FALSE;
  plug->pluginStatus = U_ZERO_ERROR;
  plug->libName[0] = 0;
  plug->config[0]=0;
  plug->sym[0]=0;
  plug->lib=NULL;
  plug->entrypoint=NULL;


  return plug;
}

static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
                                     UErrorCode *status) {
  UPlugData *plug = uplug_allocateEmptyPlug(status);
  if(U_FAILURE(*status)) {
    return NULL;
  }

  if(config!=NULL) {
    uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
  } else {
    plug->config[0] = 0;
  }
    
  if(symName!=NULL) {
    uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
  } else {
    plug->sym[0] = 0;
  }
    
  plug->entrypoint = entrypoint;
  plug->lib = lib;
  uplug_queryPlug(plug, status);
    
  return plug;
}

static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
  UErrorCode subStatus = U_ZERO_ERROR;
  if(!plug->dontUnload) {
#if U_ENABLE_DYLOAD
    uplug_closeLibrary(plug->lib, &subStatus);
#endif
  }
  plug->lib = NULL;
  if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
    *status = subStatus;
  }
  /* shift plugins up and decrement count. */
  if(U_SUCCESS(*status)) {
    /* all ok- remove. */
    pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
  } else {
    /* not ok- leave as a message. */
    plug->awaitingLoad=FALSE;
    plug->entrypoint=0;
    plug->dontUnload=TRUE;
  }
}

static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
  if(plugToRemove != NULL) {
    uplug_unloadPlug(plugToRemove, status);
    uplug_deallocatePlug(plugToRemove, status);
  }
}

U_CAPI void U_EXPORT2
uplug_removePlug(UPlugData *plug, UErrorCode *status)  {
  UPlugData *cursor = NULL;
  UPlugData *plugToRemove = NULL;
  if(U_FAILURE(*status)) return;
    
  for(cursor=pluginList;cursor!=NULL;) {
    if(cursor==plug) {
      plugToRemove = plug;
      cursor=NULL;
    } else {
      cursor = uplug_nextPlug(cursor);
    }
  }
    
  uplug_doUnloadPlug(plugToRemove, status);
}




U_CAPI void U_EXPORT2 
uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
{
  data->dontUnload = dontUnload;
}


U_CAPI void U_EXPORT2
uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
  data->level = level;
}


U_CAPI UPlugLevel U_EXPORT2
uplug_getPlugLevel(UPlugData *data) {
  return data->level;
}


U_CAPI void U_EXPORT2
uplug_setPlugName(UPlugData *data, const char *name) {
  uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
}


U_CAPI const char * U_EXPORT2
uplug_getPlugName(UPlugData *data) {
  return data->name;
}


U_CAPI const char * U_EXPORT2
uplug_getSymbolName(UPlugData *data) {
  return data->sym;
}

U_CAPI const char * U_EXPORT2
uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
  if(data->libName[0]) {
    return data->libName;
  } else {
#if U_ENABLE_DYLOAD
    return uplug_findLibrary(data->lib, status);
#else
    return NULL;
#endif
  }
}

U_CAPI void * U_EXPORT2
uplug_getLibrary(UPlugData *data) {
  return data->lib;
}

U_CAPI void * U_EXPORT2
uplug_getContext(UPlugData *data) {
  return data->context;
}


U_CAPI void U_EXPORT2
uplug_setContext(UPlugData *data, void *context) {
  data->context = context;
}

U_CAPI const char* U_EXPORT2
uplug_getConfiguration(UPlugData *data) {
  return data->config;
}

U_INTERNAL UPlugData* U_EXPORT2
uplug_getPlugInternal(int32_t n) { 
  if(n <0 || n >= pluginCount) {
    return NULL;
  } else { 
    return &(pluginList[n]);
  }
}


U_CAPI UErrorCode U_EXPORT2
uplug_getPlugLoadStatus(UPlugData *plug) {
  return plug->pluginStatus;
}




/**
 * Initialize a plugin fron an entrypoint and library - but don't load it.
 */
static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
                                                         UErrorCode *status) {
  UPlugData *plug = NULL;

  plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);

  if(U_SUCCESS(*status)) {
    return plug;
  } else {
    uplug_deallocatePlug(plug, status);
    return NULL;
  }
}

U_CAPI UPlugData* U_EXPORT2
uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
  UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
  uplug_loadPlug(plug, status);
  return plug;
}

#if U_ENABLE_DYLOAD

static UPlugData* 
uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
{
  UPlugData *plug = uplug_allocateEmptyPlug(status);
  if(U_FAILURE(*status)) return NULL;

  plug->pluginStatus = loadStatus;
  plug->awaitingLoad = FALSE; /* Won't load. */
  plug->dontUnload = TRUE; /* cannot unload. */

  if(sym!=NULL) {
    uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
  }

  if(libName!=NULL) {
    uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
  }

  if(nameOrError!=NULL) {
    uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
  }

  if(config!=NULL) {
    uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
  }

  return plug;
}

/**
 * Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
 */
static UPlugData* 
uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
  void *lib = NULL;
  UPlugData *plug = NULL;
  if(U_FAILURE(*status)) { return NULL; }
  lib = uplug_openLibrary(libName, status);
  if(lib!=NULL && U_SUCCESS(*status)) {
    UPlugEntrypoint *entrypoint = NULL;
    entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);

    if(entrypoint!=NULL&&U_SUCCESS(*status)) {
      plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
      if(plug!=NULL&&U_SUCCESS(*status)) {
        plug->lib = lib; /* plug takes ownership of library */
        lib = NULL; /* library is now owned by plugin. */
      }
    } else {
      UErrorCode subStatus = U_ZERO_ERROR;
      plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
    }
    if(lib!=NULL) { /* still need to close the lib */
      UErrorCode subStatus = U_ZERO_ERROR;
      uplug_closeLibrary(lib, &subStatus); /* don't care here */
    }
  } else {
    UErrorCode subStatus = U_ZERO_ERROR;
    plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
  }
  return plug;
}

U_CAPI UPlugData* U_EXPORT2
uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { 
  UPlugData *plug = NULL;
  if(U_FAILURE(*status)) { return NULL; }
  plug = uplug_initPlugFromLibrary(libName, sym, config, status);
  uplug_loadPlug(plug, status);

  return plug;
}

#endif

static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW;

U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
  return gCurrentLevel;
}

static UBool U_CALLCONV uplug_cleanup(void)
{
  int32_t i;
    
  UPlugData *pluginToRemove;
  /* cleanup plugs */
  for(i=0;i<pluginCount;i++) {
    UErrorCode subStatus = U_ZERO_ERROR;
    pluginToRemove = &pluginList[i];
    /* unload and deallocate */
    uplug_doUnloadPlug(pluginToRemove, &subStatus);
  }
  /* close other held libs? */
  gCurrentLevel = UPLUG_LEVEL_LOW;
  return TRUE;
}

#if U_ENABLE_DYLOAD

static void uplug_loadWaitingPlugs(UErrorCode *status) {
  int32_t i;
  UPlugLevel currentLevel = uplug_getCurrentLevel();
    
  if(U_FAILURE(*status)) {
    return;
  }
#if UPLUG_TRACE
  DBG((stderr,  "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
#endif
  /* pass #1: low level plugs */
  for(i=0;i<pluginCount;i++) {
    UErrorCode subStatus = U_ZERO_ERROR;
    UPlugData *pluginToLoad = &pluginList[i];
    if(pluginToLoad->awaitingLoad) {
      if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
        if(currentLevel > UPLUG_LEVEL_LOW) {
          pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
        } else {
          UPlugLevel newLevel;
          uplug_loadPlug(pluginToLoad, &subStatus);
          newLevel = uplug_getCurrentLevel();
          if(newLevel > currentLevel) {
            pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
            currentLevel = newLevel;
          }
        }
        pluginToLoad->awaitingLoad = FALSE;
      } 
    }
  }    
  for(i=0;i<pluginCount;i++) {
    UErrorCode subStatus = U_ZERO_ERROR;
    UPlugData *pluginToLoad = &pluginList[i];
        
    if(pluginToLoad->awaitingLoad) {
      if(pluginToLoad->level == UPLUG_LEVEL_INVALID) { 
        pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
      } else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
        pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
      } else {
        uplug_loadPlug(pluginToLoad, &subStatus);
      }
      pluginToLoad->awaitingLoad = FALSE;
    }
  }
    
#if UPLUG_TRACE
  DBG((stderr,  " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
#endif
}

/* Name of the plugin config file */
static char plugin_file[2048] = "";
#endif

U_INTERNAL const char* U_EXPORT2
uplug_getPluginFile() {
#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
  return plugin_file;
#else
  return NULL;
#endif
}


//  uplug_init()  is called first thing from u_init().

U_CAPI void U_EXPORT2
uplug_init(UErrorCode *status) {
#if !U_ENABLE_DYLOAD
  (void)status; /* unused */
#elif !UCONFIG_NO_FILE_IO
  CharString plugin_dir;
  const char *env = getenv("ICU_PLUGINS");

  if(U_FAILURE(*status)) return;
  if(env != NULL) {
    plugin_dir.append(env, -1, *status);
  }
  if(U_FAILURE(*status)) return;

#if defined(DEFAULT_ICU_PLUGINS) 
  if(plugin_dir.isEmpty()) {
    plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status);
  }
#endif

#if UPLUG_TRACE
  DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data()));
#endif

  if(!plugin_dir.isEmpty()) {
    FILE *f;
        
    CharString pluginFile;
#ifdef OS390BATCH
/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS  */
/* Keeping in mind that unauthorized file access is logged, monitored, and enforced  */
/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX    */
/* System Services.  Alternative techniques might be allocating a member in          */
/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?).  The       */
/* DDNAME can be connected to a file in the HFS if need be.                          */

    pluginFile.append("//DD:ICUPLUG", -1, *status);        /* JAM 20 Oct 2011 */
#else
    pluginFile.append(plugin_dir, *status);
    pluginFile.append(U_FILE_SEP_STRING, -1, *status);
    pluginFile.append("icuplugins", -1, *status);
    pluginFile.append(U_ICU_VERSION_SHORT, -1, *status);
    pluginFile.append(".txt", -1, *status);
#endif

#if UPLUG_TRACE
    DBG((stderr, "status=%s\n", u_errorName(*status)));
#endif

    if(U_FAILURE(*status)) {
      return;
    }
    if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) {
      *status = U_BUFFER_OVERFLOW_ERROR;
#if UPLUG_TRACE
      DBG((stderr, "status=%s\n", u_errorName(*status)));
#endif
      return;
    }
    
    /* plugin_file is not used for processing - it is only used 
       so that uplug_getPluginFile() works (i.e. icuinfo)
    */
    uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
        
#if UPLUG_TRACE
    DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
#endif
        
#ifdef __MVS__
    if (iscics()) /* 12 Nov 2011 JAM */
    {
        f = NULL;
    }
    else
#endif
    {
        f = fopen(pluginFile.data(), "r");
    }

    if(f != NULL) {
      char linebuf[1024];
      char *p, *libName=NULL, *symName=NULL, *config=NULL;
      int32_t line = 0;
            
            
      while(fgets(linebuf,1023,f)) {
        line++;

        if(!*linebuf || *linebuf=='#') {
          continue;
        } else {
          p = linebuf;
          while(*p&&isspace((int)*p))
            p++;
          if(!*p || *p=='#') continue;
          libName = p;
          while(*p&&!isspace((int)*p)) {
            p++;
          }
          if(!*p || *p=='#') continue; /* no tab after libname */
          *p=0; /* end of libname */
          p++;
          while(*p&&isspace((int)*p)) {
            p++;
          }
          if(!*p||*p=='#') continue; /* no symname after libname +tab */
          symName = p;
          while(*p&&!isspace((int)*p)) {
            p++;
          }
                    
          if(*p) { /* has config */
            *p=0;
            ++p;
            while(*p&&isspace((int)*p)) {
              p++;
            }
            if(*p) {
              config = p;
            }
          }
                    
          /* chop whitespace at the end of the config */
          if(config!=NULL&&*config!=0) {
            p = config+strlen(config);
            while(p>config&&isspace((int)*(--p))) {
              *p=0;
            }
          }
                
          /* OK, we're good. */
          { 
            UErrorCode subStatus = U_ZERO_ERROR;
            UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
            if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
              *status = subStatus;
            }
#if UPLUG_TRACE
            DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
            DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
#else
            (void)plug; /* unused */
#endif
          }
        }
      }
      fclose(f);
    } else {
#if UPLUG_TRACE
      DBG((stderr, "Can't open plugin file %s\n", plugin_file));
#endif
    }
  }
  uplug_loadWaitingPlugs(status);
#endif /* U_ENABLE_DYLOAD */
  gCurrentLevel = UPLUG_LEVEL_HIGH;
  ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
}

#endif


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.cpp
*
* created on: 2014sep03
* created by: Markus W. Scherer
*/



#if !UCONFIG_NO_NORMALIZATION









// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* norm2allmodes.h
*
* created on: 2014sep07
* created by: Markus W. Scherer
*/




#if !UCONFIG_NO_NORMALIZATION








U_NAMESPACE_BEGIN

// Intermediate class:
// Has Normalizer2Impl and does boilerplate argument checking and setup.
class Normalizer2WithImpl : public Normalizer2 {
public:
    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
    virtual ~Normalizer2WithImpl();

    // normalize
    virtual UnicodeString &
    normalize(const UnicodeString &src,
              UnicodeString &dest,
              UErrorCode &errorCode) const {
        if(U_FAILURE(errorCode)) {
            dest.setToBogus();
            return dest;
        }
        const UChar *sArray=src.getBuffer();
        if(&dest==&src || sArray==NULL) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            dest.setToBogus();
            return dest;
        }
        dest.remove();
        ReorderingBuffer buffer(impl, dest);
        if(buffer.init(src.length(), errorCode)) {
            normalize(sArray, sArray+src.length(), buffer, errorCode);
        }
        return dest;
    }
    virtual void
    normalize(const UChar *src, const UChar *limit,
              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;

    // normalize and append
    virtual UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UErrorCode &errorCode) const {
        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
    }
    virtual UnicodeString &
    append(UnicodeString &first,
           const UnicodeString &second,
           UErrorCode &errorCode) const {
        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
    }
    UnicodeString &
    normalizeSecondAndAppend(UnicodeString &first,
                             const UnicodeString &second,
                             UBool doNormalize,
                             UErrorCode &errorCode) const {
        uprv_checkCanGetBuffer(first, errorCode);
        if(U_FAILURE(errorCode)) {
            return first;
        }
        const UChar *secondArray=second.getBuffer();
        if(&first==&second || secondArray==NULL) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return first;
        }
        int32_t firstLength=first.length();
        UnicodeString safeMiddle;
        {
            ReorderingBuffer buffer(impl, first);
            if(buffer.init(firstLength+second.length(), errorCode)) {
                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
                                   safeMiddle, buffer, errorCode);
            }
        }  // The ReorderingBuffer destructor finalizes the first string.
        if(U_FAILURE(errorCode)) {
            // Restore the modified suffix of the first string.
            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
        }
        return first;
    }
    virtual void
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
                       UnicodeString &safeMiddle,
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
    virtual UBool
    getDecomposition(UChar32 c, UnicodeString &decomposition) const {
        UChar buffer[4];
        int32_t length;
        const UChar *d=impl.getDecomposition(c, buffer, length);
        if(d==NULL) {
            return FALSE;
        }
        if(d==buffer) {
            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
        } else {
            decomposition.setTo(FALSE, d, length);  // read-only alias
        }
        return TRUE;
    }
    virtual UBool
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
        UChar buffer[30];
        int32_t length;
        const UChar *d=impl.getRawDecomposition(c, buffer, length);
        if(d==NULL) {
            return FALSE;
        }
        if(d==buffer) {
            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
        } else {
            decomposition.setTo(FALSE, d, length);  // read-only alias
        }
        return TRUE;
    }
    virtual UChar32
    composePair(UChar32 a, UChar32 b) const {
        return impl.composePair(a, b);
    }

    virtual uint8_t
    getCombiningClass(UChar32 c) const {
        return impl.getCC(impl.getNorm16(c));
    }

    // quick checks
    virtual UBool
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
        if(U_FAILURE(errorCode)) {
            return FALSE;
        }
        const UChar *sArray=s.getBuffer();
        if(sArray==NULL) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return FALSE;
        }
        const UChar *sLimit=sArray+s.length();
        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
    }
    virtual UNormalizationCheckResult
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
    }
    virtual int32_t
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
        if(U_FAILURE(errorCode)) {
            return 0;
        }
        const UChar *sArray=s.getBuffer();
        if(sArray==NULL) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
    }
    virtual const UChar *
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;

    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
        return UNORM_YES;
    }

    const Normalizer2Impl &impl;
};

class DecomposeNormalizer2 : public Normalizer2WithImpl {
public:
    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    virtual ~DecomposeNormalizer2();

private:
    virtual void
    normalize(const UChar *src, const UChar *limit,
              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
        impl.decompose(src, limit, &buffer, errorCode);
    }
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    virtual void
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
                       UnicodeString &safeMiddle,
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    }
    virtual const UChar *
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
        return impl.decompose(src, limit, NULL, errorCode);
    }
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
    }
    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
};

class ComposeNormalizer2 : public Normalizer2WithImpl {
public:
    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
    virtual ~ComposeNormalizer2();

private:
    virtual void
    normalize(const UChar *src, const UChar *limit,
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
    }
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.

    void
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
        if (U_FAILURE(errorCode)) {
            return;
        }
        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
            edits->reset();
        }
        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
        impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
                         &sink, edits, errorCode);
        sink.Flush();
    }

    virtual void
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
                       UnicodeString &safeMiddle,
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
    }

    virtual UBool
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_FAILURE(errorCode)) {
            return FALSE;
        }
        const UChar *sArray=s.getBuffer();
        if(sArray==NULL) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return FALSE;
        }
        UnicodeString temp;
        ReorderingBuffer buffer(impl, temp);
        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
            return FALSE;
        }
        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
    }
    virtual UBool
    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_FAILURE(errorCode)) {
            return FALSE;
        }
        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
        return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
    }
    virtual UNormalizationCheckResult
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
        if(U_FAILURE(errorCode)) {
            return UNORM_MAYBE;
        }
        const UChar *sArray=s.getBuffer();
        if(sArray==NULL) {
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return UNORM_MAYBE;
        }
        UNormalizationCheckResult qcResult=UNORM_YES;
        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
        return qcResult;
    }
    virtual const UChar *
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
    }
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
        return impl.getCompQuickCheck(impl.getNorm16(c));
    }
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
        return impl.hasCompBoundaryBefore(c);
    }
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
        return impl.hasCompBoundaryAfter(c, onlyContiguous);
    }
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
        return impl.isCompInert(c, onlyContiguous);
    }

    const UBool onlyContiguous;
};

class FCDNormalizer2 : public Normalizer2WithImpl {
public:
    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
    virtual ~FCDNormalizer2();

private:
    virtual void
    normalize(const UChar *src, const UChar *limit,
              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
        impl.makeFCD(src, limit, &buffer, errorCode);
    }
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
    virtual void
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
                       UnicodeString &safeMiddle,
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
    }
    virtual const UChar *
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
        return impl.makeFCD(src, limit, NULL, errorCode);
    }
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
};

struct Norm2AllModes : public UMemory {
    Norm2AllModes(Normalizer2Impl *i)
            : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
    ~Norm2AllModes();

    static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
    static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
    static Norm2AllModes *createInstance(const char *packageName,
                                         const char *name,
                                         UErrorCode &errorCode);

    static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
    static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
    static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);

    Normalizer2Impl *impl;
    ComposeNormalizer2 comp;
    DecomposeNormalizer2 decomp;
    FCDNormalizer2 fcd;
    ComposeNormalizer2 fcc;
};

U_NAMESPACE_END

#endif  // !UCONFIG_NO_NORMALIZATION






U_NAMESPACE_BEGIN

class LoadedNormalizer2Impl : public Normalizer2Impl {
public:
    LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
    virtual ~LoadedNormalizer2Impl();

    void load(const char *packageName, const char *name, UErrorCode &errorCode);

private:
    static UBool U_CALLCONV
    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);

    UDataMemory *memory;
    UCPTrie *ownedTrie;
};

LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
    udata_close(memory);
    ucptrie_close(ownedTrie);
}

UBool U_CALLCONV
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
                                    const char * /* type */, const char * /*name*/,
                                    const UDataInfo *pInfo) {
    if(
        pInfo->size>=20 &&
        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
        pInfo->charsetFamily==U_CHARSET_FAMILY &&
        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
        pInfo->dataFormat[1]==0x72 &&
        pInfo->dataFormat[2]==0x6d &&
        pInfo->dataFormat[3]==0x32 &&
        pInfo->formatVersion[0]==4
    ) {
        // Normalizer2Impl *me=(Normalizer2Impl *)context;
        // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
        return TRUE;
    } else {
        return FALSE;
    }
}

void
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        return;
    }
    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
    const int32_t *inIndexes=(const int32_t *)inBytes;
    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
    if(indexesLength<=IX_MIN_LCCC_CP) {
        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
        return;
    }

    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
    ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
                                     inBytes+offset, nextOffset-offset, NULL,
                                     &errorCode);
    if(U_FAILURE(errorCode)) {
        return;
    }

    offset=nextOffset;
    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
    const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);

    // smallFCD: new in formatVersion 2
    offset=nextOffset;
    const uint8_t *inSmallFCD=inBytes+offset;

    init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
}

// instance cache ---------------------------------------------------------- ***

Norm2AllModes *
Norm2AllModes::createInstance(const char *packageName,
                              const char *name,
                              UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return NULL;
    }
    LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
    if(impl==NULL) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    }
    impl->load(packageName, name, errorCode);
    return createInstance(impl, errorCode);
}

U_CDECL_BEGIN
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
U_CDECL_END

#if !NORM2_HARDCODE_NFC_DATA
static Norm2AllModes *nfcSingleton;
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
#endif

static Norm2AllModes *nfkcSingleton;
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;

static Norm2AllModes *nfkc_cfSingleton;
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;

static UHashtable    *cache=NULL;

// UInitOnce singleton initialization function
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
#if !NORM2_HARDCODE_NFC_DATA
    if (uprv_strcmp(what, "nfc") == 0) {
        nfcSingleton    = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
    } else
#endif
    if (uprv_strcmp(what, "nfkc") == 0) {
        nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
        nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
    } else {
        UPRV_UNREACHABLE;   // Unknown singleton
    }
    ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
}

U_CDECL_BEGIN

static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    delete (Norm2AllModes *)allModes;
}

static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
#if !NORM2_HARDCODE_NFC_DATA
    delete nfcSingleton;
    nfcSingleton = NULL;
    nfcInitOnce.reset();
#endif

    delete nfkcSingleton;
    nfkcSingleton = NULL;
    nfkcInitOnce.reset();

    delete nfkc_cfSingleton;
    nfkc_cfSingleton = NULL;
    nfkc_cfInitOnce.reset();

    uhash_close(cache);
    cache=NULL;
    return TRUE;
}

U_CDECL_END

#if !NORM2_HARDCODE_NFC_DATA
const Norm2AllModes *
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return NULL; }
    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    return nfcSingleton;
}
#endif

const Norm2AllModes *
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return NULL; }
    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    return nfkcSingleton;
}

const Norm2AllModes *
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return NULL; }
    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    return nfkc_cfSingleton;
}

#if !NORM2_HARDCODE_NFC_DATA
const Normalizer2 *
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    return allModes!=NULL ? &allModes->comp : NULL;
}

const Normalizer2 *
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    return allModes!=NULL ? &allModes->decomp : NULL;
}

const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    return allModes!=NULL ? &allModes->fcd : NULL;
}

const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    return allModes!=NULL ? &allModes->fcc : NULL;
}

const Normalizer2Impl *
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    return allModes!=NULL ? allModes->impl : NULL;
}
#endif

const Normalizer2 *
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    return allModes!=NULL ? &allModes->comp : NULL;
}

const Normalizer2 *
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    return allModes!=NULL ? &allModes->decomp : NULL;
}

const Normalizer2 *
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    return allModes!=NULL ? &allModes->comp : NULL;
}

const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
                         const char *name,
                         UNormalization2Mode mode,
                         UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return NULL;
    }
    if(name==NULL || *name==0) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    const Norm2AllModes *allModes=NULL;
    if(packageName==NULL) {
        if(0==uprv_strcmp(name, "nfc")) {
            allModes=Norm2AllModes::getNFCInstance(errorCode);
        } else if(0==uprv_strcmp(name, "nfkc")) {
            allModes=Norm2AllModes::getNFKCInstance(errorCode);
        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
            allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
        }
    }
    if(allModes==NULL && U_SUCCESS(errorCode)) {
        {
            Mutex lock;
            if(cache!=NULL) {
                allModes=(Norm2AllModes *)uhash_get(cache, name);
            }
        }
        if(allModes==NULL) {
            ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
            LocalPointer<Norm2AllModes> localAllModes(
                Norm2AllModes::createInstance(packageName, name, errorCode));
            if(U_SUCCESS(errorCode)) {
                Mutex lock;
                if(cache==NULL) {
                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
                    if(U_FAILURE(errorCode)) {
                        return NULL;
                    }
                    uhash_setKeyDeleter(cache, uprv_free);
                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
                }
                void *temp=uhash_get(cache, name);
                if(temp==NULL) {
                    int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
                    char *nameCopy=(char *)uprv_malloc(keyLength);
                    if(nameCopy==NULL) {
                        errorCode=U_MEMORY_ALLOCATION_ERROR;
                        return NULL;
                    }
                    uprv_memcpy(nameCopy, name, keyLength);
                    allModes=localAllModes.getAlias();
                    uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
                } else {
                    // race condition
                    allModes=(Norm2AllModes *)temp;
                }
            }
        }
    }
    if(allModes!=NULL && U_SUCCESS(errorCode)) {
        switch(mode) {
        case UNORM2_COMPOSE:
            return &allModes->comp;
        case UNORM2_DECOMPOSE:
            return &allModes->decomp;
        case UNORM2_FCD:
            return &allModes->fcd;
        case UNORM2_COMPOSE_CONTIGUOUS:
            return &allModes->fcc;
        default:
            break;  // do nothing
        }
    }
    return NULL;
}

const Normalizer2 *
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return NULL;
    }
    switch(mode) {
    case UNORM_NFD:
        return Normalizer2::getNFDInstance(errorCode);
    case UNORM_NFKD:
        return Normalizer2::getNFKDInstance(errorCode);
    case UNORM_NFC:
        return Normalizer2::getNFCInstance(errorCode);
    case UNORM_NFKC:
        return Normalizer2::getNFKCInstance(errorCode);
    case UNORM_FCD:
        return getFCDInstance(errorCode);
    default:  // UNORM_NONE
        return getNoopInstance(errorCode);
    }
}

const Normalizer2Impl *
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    return allModes!=NULL ? allModes->impl : NULL;
}

const Normalizer2Impl *
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    return allModes!=NULL ? allModes->impl : NULL;
}

U_NAMESPACE_END

// C API ------------------------------------------------------------------- ***

U_NAMESPACE_USE

U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
}

U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
}

U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
}

U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char *packageName,
                   const char *name,
                   UNormalization2Mode mode,
                   UErrorCode *pErrorCode) {
    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
}

U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
        return UNORM_YES;
    }
    UErrorCode errorCode=U_ZERO_ERROR;
    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    if(U_SUCCESS(errorCode)) {
        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    } else {
        return UNORM_MAYBE;
    }
}

#endif  // !UCONFIG_NO_NORMALIZATION
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include <utility>

  // CharStringByteSink


// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2004-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/





/**
 * Create an iterator over the specified keywords list
 * @param keywordList double-null terminated list. Will be copied.
 * @param keywordListSize size in bytes of keywordList
 * @param status err code
 * @return enumeration (owned by caller) of the keyword list.
 * @internal ICU 3.0
 */
U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);

/**
 * Look up a resource bundle table item with fallback on the table level.
 * This is accessible so it can be called by C++ code.
 */
U_CAPI const UChar * U_EXPORT2
uloc_getTableStringWithFallback(
    const char *path,
    const char *locale,
    const char *tableKey,
    const char *subTableKey,
    const char *itemKey,
    int32_t *pLength,
    UErrorCode *pErrorCode);

/*returns TRUE if a is an ID separator FALSE otherwise*/
#define _isIDSeparator(a) (a == '_' || a == '-')

U_CFUNC const char* 
uloc_getCurrentCountryID(const char* oldID);

U_CFUNC const char* 
uloc_getCurrentLanguageID(const char* oldID);

U_CFUNC int32_t
ulocimp_getLanguage(const char *localeID,
                    char *language, int32_t languageCapacity,
                    const char **pEnd);

U_CFUNC int32_t
ulocimp_getScript(const char *localeID,
                   char *script, int32_t scriptCapacity,
                   const char **pEnd);

U_CFUNC int32_t
ulocimp_getCountry(const char *localeID,
                   char *country, int32_t countryCapacity,
                   const char **pEnd);

/**
 * Writes a well-formed language tag for this locale ID.
 *
 * **Note**: When `strict` is FALSE, any locale fields which do not satisfy the
 * BCP47 syntax requirement will be omitted from the result.  When `strict` is
 * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
 * fields do not satisfy the BCP47 syntax requirement.
 *
 * @param localeID  the input locale ID
 * @param sink      the output sink receiving the BCP47 language
 *                  tag for this Locale.
 * @param strict    boolean value indicating if the function returns
 *                  an error for an ill-formed input locale ID.
 * @param err       error information if receiving the language
 *                  tag failed.
 * @return          The length of the BCP47 language tag.
 *
 * @internal ICU 64
 */
U_STABLE void U_EXPORT2
ulocimp_toLanguageTag(const char* localeID,
                      icu::ByteSink& sink,
                      UBool strict,
                      UErrorCode* err);

/**
 * Returns a locale ID for the specified BCP47 language tag string.
 * If the specified language tag contains any ill-formed subtags,
 * the first such subtag and all following subtags are ignored.
 * <p>
 * This implements the 'Language-Tag' production of BCP47, and so
 * supports grandfathered (regular and irregular) as well as private
 * use language tags.  Private use tags are represented as 'x-whatever',
 * and grandfathered tags are converted to their canonical replacements
 * where they exist.  Note that a few grandfathered tags have no modern
 * replacement, these will be converted using the fallback described in
 * the first paragraph, so some information might be lost.
 * @param langtag   the input BCP47 language tag.
 * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
 * @param sink      the output sink receiving a locale ID for the
 *                  specified BCP47 language tag.
 * @param parsedLength  if not NULL, successfully parsed length
 *                      for the input language tag is set.
 * @param err       error information if receiving the locald ID
 *                  failed.
 * @internal ICU 63
 */
U_CAPI void U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
                       int32_t tagLen,
                       icu::ByteSink& sink,
                       int32_t* parsedLength,
                       UErrorCode* err);

/**
 * Get the region to use for supplemental data lookup. Uses
 * (1) any region specified by locale tag "rg"; if none then
 * (2) any unicode_region_tag in the locale ID; if none then
 * (3) if inferRegion is TRUE, the region suggested by
 * getLikelySubtags on the localeID.
 * If no region is found, returns length 0.
 * 
 * @param localeID
 *     The complete locale ID (with keywords) from which
 *     to get the region to use for supplemental data.
 * @param inferRegion
 *     If TRUE, will try to infer region from localeID if
 *     no other region is found.
 * @param region
 *     Buffer in which to put the region ID found; should
 *     have a capacity at least ULOC_COUNTRY_CAPACITY. 
 * @param regionCapacity
 *     The actual capacity of the region buffer.
 * @param status
 *     Pointer to in/out UErrorCode value for latest status.
 * @return
 *     The length of any region code found, or 0 if none.
 * @internal ICU 57
 */
U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
                                     char *region, int32_t regionCapacity, UErrorCode* status);

/**
 * Add the likely subtags for a provided locale ID, per the algorithm described
 * in the following CLDR technical report:
 *
 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
 *
 * If localeID is already in the maximal form, or there is no data available
 * for maximization, it will be copied to the output buffer.  For example,
 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
 *
 * Examples:
 *
 * "en" maximizes to "en_Latn_US"
 *
 * "de" maximizes to "de_Latn_US"
 *
 * "sr" maximizes to "sr_Cyrl_RS"
 *
 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
 *
 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
 *
 * @param localeID The locale to maximize
 * @param sink The output sink receiving the maximized locale
 * @param err Error information if maximizing the locale failed.  If the length
 * of the localeID and the null-terminator is greater than the maximum allowed size,
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
 * @internal ICU 64
 */
U_STABLE void U_EXPORT2
ulocimp_addLikelySubtags(const char* localeID,
                         icu::ByteSink& sink,
                         UErrorCode* err);

/**
 * Minimize the subtags for a provided locale ID, per the algorithm described
 * in the following CLDR technical report:
 *
 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
 *
 * If localeID is already in the minimal form, or there is no data available
 * for minimization, it will be copied to the output buffer.  Since the
 * minimization algorithm relies on proper maximization, see the comments
 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
 *
 * Examples:
 *
 * "en_Latn_US" minimizes to "en"
 *
 * "de_Latn_US" minimizes to "de"
 *
 * "sr_Cyrl_RS" minimizes to "sr"
 *
 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
 *
 * @param localeID The locale to minimize
 * @param sink The output sink receiving the maximized locale
 * @param err Error information if minimizing the locale failed.  If the length
 * of the localeID and the null-terminator is greater than the maximum allowed size,
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
 * @internal ICU 64
 */
U_STABLE void U_EXPORT2
ulocimp_minimizeSubtags(const char* localeID,
                        icu::ByteSink& sink,
                        UErrorCode* err);

U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);

U_CFUNC UBool
ultag_isExtensionSubtags(const char* s, int32_t len);

U_CFUNC UBool
ultag_isLanguageSubtag(const char* s, int32_t len);

U_CFUNC UBool
ultag_isPrivateuseValueSubtags(const char* s, int32_t len);

U_CFUNC UBool
ultag_isRegionSubtag(const char* s, int32_t len);

U_CFUNC UBool
ultag_isScriptSubtag(const char* s, int32_t len);

U_CFUNC UBool
ultag_isTransformedExtensionSubtags(const char* s, int32_t len);

U_CFUNC UBool
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);

U_CFUNC UBool
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);

U_CFUNC UBool
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);

U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len);

U_CFUNC UBool
ultag_isUnicodeLocaleType(const char* s, int32_t len);

U_CFUNC UBool
ultag_isVariantSubtags(const char* s, int32_t len);

U_CFUNC const char*
ulocimp_toBcpKey(const char* key);

U_CFUNC const char*
ulocimp_toLegacyKey(const char* key);

U_CFUNC const char*
ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);

U_CFUNC const char*
ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);


// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License



#if U_SHOW_CPLUSPLUS_API


// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License

// localematcher.h
// created: 2019may08 Markus W. Scherer




#if U_SHOW_CPLUSPLUS_API





/**
 * \file
 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
 */

#ifndef U_HIDE_DRAFT_API

/**
 * Builder option for whether the language subtag or the script subtag is most important.
 *
 * @see Builder#setFavorSubtag(FavorSubtag)
 * @draft ICU 65
 */
enum ULocMatchFavorSubtag {
    /**
     * Language differences are most important, then script differences, then region differences.
     * (This is the default behavior.)
     *
     * @draft ICU 65
     */
    ULOCMATCH_FAVOR_LANGUAGE,
    /**
     * Makes script differences matter relatively more than language differences.
     *
     * @draft ICU 65
     */
    ULOCMATCH_FAVOR_SCRIPT
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
#endif

/**
 * Builder option for whether all desired locales are treated equally or
 * earlier ones are preferred.
 *
 * @see Builder#setDemotionPerDesiredLocale(Demotion)
 * @draft ICU 65
 */
enum ULocMatchDemotion {
    /**
     * All desired locales are treated equally.
     *
     * @draft ICU 65
     */
    ULOCMATCH_DEMOTION_NONE,
    /**
     * Earlier desired locales are preferred.
     *
     * <p>From each desired locale to the next,
     * the distance to any supported locale is increased by an additional amount
     * which is at least as large as most region mismatches.
     * A later desired locale has to have a better match with some supported locale
     * due to more than merely having the same region subtag.
     *
     * <p>For example: <code>Supported={en, sv}  desired=[en-GB, sv]</code>
     * yields <code>Result(en-GB, en)</code> because
     * with the demotion of sv its perfect match is no better than
     * the region distance between the earlier desired locale en-GB and en=en-US.
     *
     * <p>Notes:
     * <ul>
     *   <li>In some cases, language and/or script differences can be as small as
     *       the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
     *   <li>It is possible for certain region differences to be larger than usual,
     *       and larger than the demotion.
     *       (As of CLDR 35 there is no such case, but
     *        this is possible in future versions of the data.)
     * </ul>
     *
     * @draft ICU 65
     */
    ULOCMATCH_DEMOTION_REGION
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchDemotion ULocMatchDemotion;
#endif

struct UHashtable;

U_NAMESPACE_BEGIN

struct LSR;

class LocaleDistance;
class LocaleLsrIterator;
class UVector;
class XLikelySubtags;

/**
 * Immutable class that picks the best match between a user's desired locales and
 * an application's supported locales.
 * Movable but not copyable.
 *
 * <p>Example:
 * <pre>
 * UErrorCode errorCode = U_ZERO_ERROR;
 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode);  // "en"
 * </pre>
 *
 * <p>A matcher takes into account when languages are close to one another,
 * such as Danish and Norwegian,
 * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
 *
 * <p>If there are multiple supported locales with the same (language, script, region)
 * likely subtags, then the current implementation returns the first of those locales.
 * It ignores variant subtags (except for pseudolocale variants) and extensions.
 * This may change in future versions.
 *
 * <p>For example, the current implementation does not distinguish between
 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
 *
 * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
 * or place it earlier in the list of supported locales.
 *
 * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
 * The current implementation compares each desired locale with supported locales
 * in the following order:
 * 1. Default locale, if supported;
 * 2. CLDR "paradigm locales" like en-GB and es-419;
 * 3. other supported locales.
 * This may change in future versions.
 *
 * <p>Often a product will just need one matcher instance, built with the languages
 * that it supports. However, it may want multiple instances with different
 * default languages based on additional information, such as the domain.
 *
 * <p>This class is not intended for public subclassing.
 *
 * @draft ICU 65
 */
class U_COMMON_API LocaleMatcher : public UMemory {
public:
    /**
     * Data for the best-matching pair of a desired and a supported locale.
     * Movable but not copyable.
     *
     * @draft ICU 65
     */
    class U_COMMON_API Result : public UMemory {
    public:
        /**
         * Move constructor; might modify the source.
         * This object will have the same contents that the source object had.
         *
         * @param src Result to move contents from.
         * @draft ICU 65
         */
        Result(Result &&src) U_NOEXCEPT;

        /**
         * Destructor.
         *
         * @draft ICU 65
         */
        ~Result();

        /**
         * Move assignment; might modify the source.
         * This object will have the same contents that the source object had.
         *
         * @param src Result to move contents from.
         * @draft ICU 65
         */
        Result &operator=(Result &&src) U_NOEXCEPT;

        /**
         * Returns the best-matching desired locale.
         * nullptr if the list of desired locales is empty or if none matched well enough.
         *
         * @return the best-matching desired locale, or nullptr.
         * @draft ICU 65
         */
        inline const Locale *getDesiredLocale() const { return desiredLocale; }

        /**
         * Returns the best-matching supported locale.
         * If none matched well enough, this is the default locale.
         * The default locale is nullptr if the list of supported locales is empty and
         * no explicit default locale is set.
         *
         * @return the best-matching supported locale, or nullptr.
         * @draft ICU 65
         */
        inline const Locale *getSupportedLocale() const { return supportedLocale; }

        /**
         * Returns the index of the best-matching desired locale in the input Iterable order.
         * -1 if the list of desired locales is empty or if none matched well enough.
         *
         * @return the index of the best-matching desired locale, or -1.
         * @draft ICU 65
         */
        inline int32_t getDesiredIndex() const { return desiredIndex; }

        /**
         * Returns the index of the best-matching supported locale in the
         * constructor’s or builder’s input order (“set” Collection plus “added” locales).
         * If the matcher was built from a locale list string, then the iteration order is that
         * of a LocalePriorityList built from the same string.
         * -1 if the list of supported locales is empty or if none matched well enough.
         *
         * @return the index of the best-matching supported locale, or -1.
         * @draft ICU 65
         */
        inline int32_t getSupportedIndex() const { return supportedIndex; }

        /**
         * Takes the best-matching supported locale and adds relevant fields of the
         * best-matching desired locale, such as the -t- and -u- extensions.
         * May replace some fields of the supported locale.
         * The result is the locale 