From fe4653b02baf68d27ee101234db2b844e53313c6 Mon Sep 17 00:00:00 2001
From: Kenny Root
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ *
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ *
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ *
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored. For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ *
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ *
+ * In addition to the C++ API defined in this header file, a
+ * plain C API with equivalent functionality is defined in the
+ * file ubrk.h
+ *
+ * Code snippits illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
+ * and in the sample program icu/source/samples/break/break.cpp"
+ *
+ */
+class U_COMMON_API BreakIterator : public UObject {
+public:
+ /**
+ * destructor
+ * @stable ICU 2.0
+ */
+ virtual ~BreakIterator();
+
+ /**
+ * Return true if another object is semantically equal to this
+ * one. The other object should be an instance of the same subclass of
+ * BreakIterator. Objects of different subclasses are considered
+ * unequal.
+ *
+ * Return true if this BreakIterator is at the same position in the
+ * same text, and is the same class and type (word, line, etc.) of
+ * BreakIterator, as the argument. Text is considered the same if
+ * it contains the same characters, it need not be the same
+ * object, and styles are not considered.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const BreakIterator&) const = 0;
+
+ /**
+ * Returns the complement of the result of operator==
+ * @param rhs The BreakIterator to be compared for inequality
+ * @return the complement of the result of operator==
+ * @stable ICU 2.0
+ */
+ UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
+
+ /**
+ * Return a polymorphic copy of this object. This is an abstract
+ * method which subclasses implement.
+ * @stable ICU 2.0
+ */
+ virtual BreakIterator* clone(void) const = 0;
+
+ /**
+ * Return a polymorphic class ID for this object. Different subclasses
+ * will return distinct unequal values.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Return a CharacterIterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator& getText(void) const = 0;
+
+
+ /**
+ * Get a UText for the text being analyzed.
+ * The returned UText is a shallow clone of the UText used internally
+ * by the break iterator implementation. It can safely be used to
+ * access the text without impacting any break iterator operations,
+ * but the underlying text itself must not be altered.
+ *
+ * @param fillIn A UText to be filled in. If NULL, a new UText will be
+ * allocated to hold the result.
+ * @param status receives any error codes.
+ * @return The current UText for this break iterator. If an input
+ * UText was provided, it will always be returned.
+ * @stable ICU 3.4
+ */
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
+
+ /**
+ * Change the text over which this operates. The text boundary is
+ * reset to the start.
+ * @param text The UnicodeString used to change the text.
+ * @stable ICU 2.0
+ */
+ virtual void setText(const UnicodeString &text) = 0;
+
+ /**
+ * Reset the break iterator to operate over the text represented by
+ * the UText. The iterator position is reset to the start.
+ *
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * Utext that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ *
+ * @param text The UText used to change the text.
+ * @param status receives any error codes.
+ * @stable ICU 3.4
+ */
+ virtual void setText(UText *text, UErrorCode &status) = 0;
+
+ /**
+ * Change the text over which this operates. The text boundary is
+ * reset to the start.
+ * Note that setText(UText *) provides similar functionality to this function,
+ * and is more efficient.
+ * @param it The CharacterIterator used to change the text.
+ * @stable ICU 2.0
+ */
+ virtual void adoptText(CharacterIterator* it) = 0;
+
+ enum {
+ /**
+ * DONE is returned by previous() and next() after all valid
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ DONE = (int32_t)-1
+ };
+
+ /**
+ * Return the index of the first character in the text being scanned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t first(void) = 0;
+
+ /**
+ * Return the index immediately BEYOND the last character in the text being scanned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t last(void) = 0;
+
+ /**
+ * Return the boundary preceding the current boundary.
+ * @return The character index of the previous text boundary or DONE if all
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t previous(void) = 0;
+
+ /**
+ * Return the boundary following the current boundary.
+ * @return The character index of the next text boundary or DONE if all
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(void) = 0;
+
+ /**
+ * Return character index of the current interator position within the text.
+ * @return The boundary most recently returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t current(void) const = 0;
+
+ /**
+ * Return the first boundary following the specified offset.
+ * The value returned is always greater than the offset or
+ * the value BreakIterator.DONE
+ * @param offset the offset to begin scanning.
+ * @return The first boundary after the specified offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t following(int32_t offset) = 0;
+
+ /**
+ * Return the first boundary preceding the specified offset.
+ * The value returned is always smaller than the offset or
+ * the value BreakIterator.DONE
+ * @param offset the offset to begin scanning.
+ * @return The first boundary before the specified offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t preceding(int32_t offset) = 0;
+
+ /**
+ * Return true if the specfied position is a boundary position.
+ * As a side effect, the current position of the iterator is set
+ * to the first boundary position at or following the specified offset.
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+ virtual UBool isBoundary(int32_t offset) = 0;
+
+ /**
+ * Return the nth boundary from the current boundary
+ * @param n which boundary to return. A value of 0
+ * does nothing. Negative values move to previous boundaries
+ * and positive values move to later boundaries.
+ * @return The index of the nth boundary from the current position, or
+ * DONE if there are fewer than |n| boundaries in the specfied direction.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(int32_t n) = 0;
+
+ /**
+ * Create BreakIterator for word-breaks using the given locale.
+ * Returns an instance of a BreakIterator implementing word breaks.
+ * WordBreak is useful for word selection (ex. double click)
+ * @param where the locale.
+ * @param status the error code
+ * @return A BreakIterator for word-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createWordInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for line-breaks using specified locale.
+ * Returns an instance of a BreakIterator implementing line breaks. Line
+ * breaks are logically possible line breaks, actual line breaks are
+ * usually determined based on display width.
+ * LineBreak is useful for word wrapping text.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for line-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createLineInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for character-breaks using specified locale
+ * Returns an instance of a BreakIterator implementing character breaks.
+ * Character breaks are boundaries of combining character sequences.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for character-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createCharacterInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for sentence-breaks using specified locale
+ * Returns an instance of a BreakIterator implementing sentence breaks.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for sentence-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createSentenceInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for title-casing breaks using the specified locale
+ * Returns an instance of a BreakIterator implementing title breaks.
+ * The iterator returned locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.{@link #createWordInstance }
+ *
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for title-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.1
+ */
+ static BreakIterator* U_EXPORT2
+ createTitleInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Get the set of Locales for which TextBoundaries are installed.
+ * Note: this will not return locales added through the register
+ * call. To see the registered locales too, use the getAvailableLocales
+ * function that returns a StringEnumeration object Characters can be accessed in two ways: as code units or as
+ * code points.
+ * Unicode code points are 21-bit integers and are the scalar values
+ * of Unicode characters. ICU uses the type UChar32 for them.
+ * Unicode code units are the storage units of a given
+ * Unicode/UCS Transformation Format (a character encoding scheme).
+ * With UTF-16, all code points can be represented with either one
+ * or two code units ("surrogates").
+ * String storage is typically based on code units, while properties
+ * of characters are typically determined using code point values.
+ * Some processes may be designed to work with sequences of code units,
+ * or it may be known that all characters that are important to an
+ * algorithm can be represented with single code units.
+ * Other processes will need to use the code point access functions. ForwardCharacterIterator provides nextPostInc() to access
+ * a code unit and advance an internal position into the text object,
+ * similar to a next32PostInc() assumes that the current position is that of
+ * the beginning of a code point, i.e., of its first code unit.
+ * After next32PostInc(), this will be true again.
+ * In general, access to code units and code points in the same
+ * iteration loop should not be mixed. In UTF-16, if the current position
+ * is on a second code unit (Low Surrogate), then only that code unit
+ * is returned even by next32PostInc(). For iteration with either function, there are two ways to
+ * check for the end of the iteration. When there are no more
+ * characters in the text object:
+ *
Note: the code is intended for use with small strings, and is not suitable for larger ones,
+ * since it has not been optimized for that situation.
+ * Note, CanonicalIterator is not intended to be subclassed.
+ * @author M. Davis
+ * @author C++ port by V. Weinstein
+ * @stable ICU 2.4
+ */
+class U_COMMON_API CanonicalIterator : public UObject {
+public:
+ /**
+ * Construct a CanonicalIterator object
+ * @param source string to get results for
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @stable ICU 2.4
+ */
+ CanonicalIterator(const UnicodeString &source, UErrorCode &status);
+
+ /** Destructor
+ * Cleans pieces
+ * @stable ICU 2.4
+ */
+ virtual ~CanonicalIterator();
+
+ /**
+ * Gets the NFD form of the current source we are iterating over.
+ * @return gets the source: NOTE: it is the NFD form of source
+ * @stable ICU 2.4
+ */
+ UnicodeString getSource();
+
+ /**
+ * Resets the iterator so that one can start again from the beginning.
+ * @stable ICU 2.4
+ */
+ void reset();
+
+ /**
+ * Get the next canonically equivalent string.
+ *
Warning: The strings are not guaranteed to be in any particular order.
+ * @return the next string that is canonically equivalent. A bogus string is returned when
+ * the iteration is done.
+ * @stable ICU 2.4
+ */
+ UnicodeString next();
+
+ /**
+ * Set a new source for this iterator. Allows object reuse.
+ * @param newSource the source string to iterate against. This allows the same iterator to be used
+ * while changing the source string, saving object creation.
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @stable ICU 2.4
+ */
+ void setSource(const UnicodeString &newSource, UErrorCode &status);
+
+ /**
+ * Dumb recursive implementation of permutation.
+ * TODO: optimize
+ * @param source the string to find permutations for
+ * @param skipZeros determine if skip zeros
+ * @param result the results in a set.
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @internal
+ */
+ static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ // ===================== PRIVATES ==============================
+ // private default constructor
+ CanonicalIterator();
+
+
+ /**
+ * Copy constructor. Private for now.
+ * @internal
+ */
+ CanonicalIterator(const CanonicalIterator& other);
+
+ /**
+ * Assignment operator. Private for now.
+ * @internal
+ */
+ CanonicalIterator& operator=(const CanonicalIterator& other);
+
+ // fields
+ UnicodeString source;
+ UBool done;
+
+ // 2 dimensional array holds the pieces of the string with
+ // their different canonically equivalent representations
+ UnicodeString **pieces;
+ int32_t pieces_length;
+ int32_t *pieces_lengths;
+
+ // current is used in iterating to combine pieces
+ int32_t *current;
+ int32_t current_length;
+
+ // transient fields
+ UnicodeString buffer;
+
+ // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
+ UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
+
+ //Set getEquivalents2(String segment);
+ Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);
+ //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
+
+ /**
+ * See if the decomposition of cp2 is at segment starting at segmentPos
+ * (with canonical rearrangment!)
+ * If so, take the remainder, and return the equivalents
+ */
+ //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
+ Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+ //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+
+ void cleanPieces();
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/chariter.h b/jni/EastAsianWidth/unicode/chariter.h
new file mode 100644
index 0000000..12fc924
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/chariter.h
@@ -0,0 +1,716 @@
+/*
+********************************************************************
+*
+* Copyright (C) 1997-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+********************************************************************
+*/
+
+#ifndef CHARITER_H
+#define CHARITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+/**
+ * \file
+ * \brief C++ API: Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * Abstract class that defines an API for forward-only iteration
+ * on text objects.
+ * This is a minimal interface for iteration without random access
+ * or backwards iteration. It is especially useful for wrapping
+ * streams with converters into an object for collation or
+ * normalization.
+ *
+ * return text[position++].
+ * It provides next32PostInc() to access a code point and advance an internal
+ * position.
+ *
+ *
+ * Example:
+ * \code
+ * void function1(ForwardCharacterIterator &it) {
+ * UChar32 c;
+ * while(it.hasNext()) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ *
+ * void function1(ForwardCharacterIterator &it) {
+ * UChar c;
+ * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ *
Despite the fact that this function is public, + * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! + * @return a UClassID for this ForwardCharacterIterator + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Gets the current code unit for returning and advances to the next code unit + * in the iteration range + * (toward endIndex()). If there are + * no more code units to return, returns DONE. + * @return the current code unit. + * @stable ICU 2.0 + */ + virtual UChar nextPostInc(void) = 0; + + /** + * Gets the current code point for returning and advances to the next code point + * in the iteration range + * (toward endIndex()). If there are + * no more code points to return, returns DONE. + * @return the current code point. + * @stable ICU 2.0 + */ + virtual UChar32 next32PostInc(void) = 0; + + /** + * Returns FALSE if there are no more code units or code points + * at or after the current position in the iteration range. + * This is used with nextPostInc() or next32PostInc() in forward + * iteration. + * @returns FALSE if there are no more code units or code points + * at or after the current position in the iteration range. + * @stable ICU 2.0 + */ + virtual UBool hasNext() = 0; + +protected: + /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/ + ForwardCharacterIterator(); + + /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/ + ForwardCharacterIterator(const ForwardCharacterIterator &other); + + /** + * Assignment operator to be overridden in the implementing class. + * @stable ICU 2.0 + */ + ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; } +}; + +/** + * Abstract class that defines an API for iteration + * on text objects. + * This is an interface for forward and backward iteration + * and random access into a text object. + * + *
The API provides backward compatibility to the Java and older ICU + * CharacterIterator classes but extends them significantly: + *
Examples for some of the new functions:
+ * + * Forward iteration with hasNext(): + * \code + * void forward1(CharacterIterator &it) { + * UChar32 c; + * for(it.setToStart(); it.hasNext();) { + * c=it.next32PostInc(); + * // use c + * } + * } + * \endcode + * Forward iteration more similar to loops with the old forward iteration, + * showing a way to convert simple for() loops: + * \code + * void forward2(CharacterIterator &it) { + * UChar c; + * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { + * // use c + * } + * } + * \endcode + * Backward iteration with setToEnd() and hasPrevious(): + * \code + * void backward1(CharacterIterator &it) { + * UChar32 c; + * for(it.setToEnd(); it.hasPrevious();) { + * c=it.previous32(); + * // use c + * } + * } + * \endcode + * Backward iteration with a more traditional for() loop: + * \code + * void backward2(CharacterIterator &it) { + * UChar c; + * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { + * // use c + * } + * } + * \endcode + * + * Example for random access: + * \code + * void random(CharacterIterator &it) { + * // set to the third code point from the beginning + * it.move32(3, CharacterIterator::kStart); + * // get a code point from here without moving the position + * UChar32 c=it.current32(); + * // get the position + * int32_t pos=it.getIndex(); + * // get the previous code unit + * UChar u=it.previous(); + * // move back one more code unit + * it.move(-1, CharacterIterator::kCurrent); + * // set the position back to where it was + * // and read the same code point c and move beyond it + * it.setIndex(pos); + * if(c!=it.next32PostInc()) { + * exit(1); // CharacterIterator inconsistent + * } + * } + * \endcode + * + *Examples, especially for the old API:
+ * + * Function processing characters, in this example simple output + *
+ * \code
+ * void processChar( UChar c )
+ * {
+ * cout << " " << c;
+ * }
+ * \endcode
+ *
+ * Traverse the text from start to finish
+ *
+ * \code
+ * void traverseForward(CharacterIterator& iter)
+ * {
+ * for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ *
+ * Traverse the text backwards, from end to start
+ *
+ * \code
+ * void traverseBackward(CharacterIterator& iter)
+ * {
+ * for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ *
+ * Traverse both forward and backward from a given position in the text.
+ * Calls to notBoundary() in this example represents some additional stopping criteria.
+ *
+ * \code
+ * void traverseOut(CharacterIterator& iter, int32_t pos)
+ * {
+ * UChar c;
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.next()) {}
+ * int32_t end = iter.getIndex();
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.previous()) {}
+ * int32_t start = iter.getIndex() + 1;
+ *
+ * cout << "start: " << start << " end: " << end << endl;
+ * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ *
+ * Creating a StringCharacterIterator and calling the test functions
+ *
+ * \code
+ * void CharacterIterator_Example( void )
+ * {
+ * cout << endl << "===== CharacterIterator_Example: =====" << endl;
+ * UnicodeString text("Ein kleiner Satz.");
+ * StringCharacterIterator iterator(text);
+ * cout << "----- traverseForward: -----------" << endl;
+ * traverseForward( iterator );
+ * cout << endl << endl << "----- traverseBackward: ----------" << endl;
+ * traverseBackward( iterator );
+ * cout << endl << endl << "----- traverseOut: ---------------" << endl;
+ * traverseOut( iterator, 7 );
+ * cout << endl << endl << "-----" << endl;
+ * }
+ * \endcode
+ *
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
+public:
+ /**
+ * Origin enumeration for the move() and move32() functions.
+ * @stable ICU 2.0
+ */
+ enum EOrigin { kStart, kCurrent, kEnd };
+
+ /**
+ * Returns a pointer to a new CharacterIterator of the same
+ * concrete class as this one, and referring to the same
+ * character in the same text-storage object as this one. The
+ * caller is responsible for deleting the new clone.
+ * @return a pointer to a new CharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone(void) const = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar first(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar firstPostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code unit or code point in its
+ * iteration range. This can be used to begin a forward
+ * iteration with nextPostInc() or next32PostInc().
+ * @return the start position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToStart();
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar last(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void) = 0;
+
+ /**
+ * Sets the iterator to the end of its iteration range, just behind
+ * the last code unit or code point. This can be used to begin a backward
+ * iteration with previous() or previous32().
+ * @return the end position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToEnd();
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar setIndex(int32_t position) = 0;
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position) = 0;
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar current(void) const = 0;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const = 0;
+
+ /**
+ * Advances to the next code unit in the iteration range
+ * (toward endIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the next code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar next(void) = 0;
+
+ /**
+ * Advances to the next code point in the iteration range
+ * (toward endIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void) = 0;
+
+ /**
+ * Advances to the previous code unit in the iteration range
+ * (toward startIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the previous code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar previous(void) = 0;
+
+ /**
+ * Advances to the previous code point in the iteration range
+ * (toward startIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * @return the previous code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void) = 0;
+
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return FALSE if there are no more code units or code points
+ * before the current position in the iteration range, return TRUE otherwise.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious() = 0;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character returned by first(). Since it's
+ * possible to create an iterator that iterates across only
+ * part of a text-storage object, this number isn't
+ * necessarily 0.
+ * @returns the numeric index in the underlying text-storage
+ * object of the character returned by first().
+ * @stable ICU 2.0
+ */
+ inline int32_t startIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @return the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @stable ICU 2.0
+ */
+ inline int32_t endIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character the iterator currently refers to
+ * (i.e., the character returned by current()).
+ * @return the numberic index in the text-storage object of
+ * the character the iterator currently refers to
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
+
+ /**
+ * Returns the length of the entire text in the underlying
+ * text-storage object.
+ * @return the length of the entire text in the text-storage object
+ * @stable ICU 2.0
+ */
+ inline int32_t getLength() const;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Copies the text under iteration into the UnicodeString
+ * referred to by "result".
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) = 0;
+
+protected:
+ /**
+ * Empty constructor.
+ * @stable ICU 2.0
+ */
+ CharacterIterator();
+
+ /**
+ * Constructor, just setting the length field in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length);
+
+ /**
+ * Constructor, just setting the length and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t position);
+
+ /**
+ * Constructor, just setting the length, start, end, and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+
+ /**
+ * Copy constructor.
+ *
+ * @param that The CharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ CharacterIterator(const CharacterIterator &that);
+
+ /**
+ * Assignment operator. Sets this CharacterIterator to have the same behavior,
+ * as the one passed in.
+ * @param that The CharacterIterator passed in.
+ * @return the newly set CharacterIterator.
+ * @stable ICU 2.0
+ */
+ CharacterIterator &operator=(const CharacterIterator &that);
+
+ /**
+ * Base class text length field.
+ * Necessary this for correct getText() and hashCode().
+ * @stable ICU 2.0
+ */
+ int32_t textLength;
+
+ /**
+ * Base class field for the current position.
+ * @stable ICU 2.0
+ */
+ int32_t pos;
+
+ /**
+ * Base class field for the start of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t begin;
+
+ /**
+ * Base class field for the end of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t end;
+};
+
+inline UBool
+ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
+ return !operator==(that);
+}
+
+inline int32_t
+CharacterIterator::setToStart() {
+ return move(0, kStart);
+}
+
+inline int32_t
+CharacterIterator::setToEnd() {
+ return move(0, kEnd);
+}
+
+inline int32_t
+CharacterIterator::startIndex(void) const {
+ return begin;
+}
+
+inline int32_t
+CharacterIterator::endIndex(void) const {
+ return end;
+}
+
+inline int32_t
+CharacterIterator::getIndex(void) const {
+ return pos;
+}
+
+inline int32_t
+CharacterIterator::getLength(void) const {
+ return textLength;
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/jni/EastAsianWidth/unicode/dbbi.h b/jni/EastAsianWidth/unicode/dbbi.h
new file mode 100644
index 0000000..c7984ef
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/dbbi.h
@@ -0,0 +1,41 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2006 IBM Corp. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 12/1/99 rgillam Complete port from Java.
+* 01/13/2000 helena Added UErrorCode to ctors.
+**********************************************************************
+*/
+
+#ifndef DBBI_H
+#define DBBI_H
+
+#include "unicode/rbbi.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * \file
+ * \brief C++ API: Dictionary Based Break Iterator
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary-
+ * based break iteration has been folded into the base class. This class
+ * is deprecated as of ICU 3.6.
+ */
+
+#ifndef U_HIDE_DEPRECATED_API
+
+typedef RuleBasedBreakIterator DictionaryBasedBreakIterator;
+
+#endif
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/docmain.h b/jni/EastAsianWidth/unicode/docmain.h
new file mode 100644
index 0000000..973ebea
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/docmain.h
@@ -0,0 +1,202 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1997-2007, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *
+ * FILE NAME: DOCMAIN.h
+ *
+ * Date Name Description
+ * 12/11/2000 Ram Creation.
+ */
+
+/* This file contains documentation for Doxygen and doesnot have
+ * any significance with respect to C or C++ API
+ */
+
+/*! \mainpage
+ *
+ * \section API API Reference Usage
+ *
+ * Use Class Hierarchy or Alphabetical List + * or Compound List + * to find the class you are interested in. For example, to find BreakIterator, + * you can go to the Alphabetical List, then click on + * "BreakIterator". Once you are at the class, you will find an inheritance + * chart, a list of the public members, a detailed description of the class, + * then detailed member descriptions.
+ * + *Use Module List or File Members + * to find a list of all the functions and constants. + * For example, to find BreakIterator functions you would click on + * File List, + * then find "ubrk.h" and click on it. You will find descriptions of Defines, + * Typedefs, Enumerations, and Functions, with detailed descriptions below. + * If you want to find a specific function, such as ubrk_next(), then click + * first on File Members, then use your browser + * Find dialog to search for "ubrk_next()".
+ * + * + *The API References for each release of ICU are also available as + * a zip file from the ICU + * download page.
+ * + *| Module Name | + *C | + *C++ | + *
| Basic Types and Constants | + *utypes.h | + *utypes.h | + *
| Strings and Character Iteration | + *ustring.h, utf.h | + *UnicodeString, CharacterIterator | + *
| Unicode Character Properties and Names |
+ * uchar.h | + *uchar.h C API | + *
| Codepage Conversion | + *ucnv.h | + *ucnv.h C API | + *
| Unicode Text Compression | + *ucnv.h (encoding name "SCSU" or "BOCU-1") |
+ * ucnv.h C API | + *
| Locales | + *uloc.h | + *Locale | + *
| Resource Bundles | + *ures.h | + *ResourceBundle | + *
| Normalization | + *unorm.h | + *Normalizer | + *
| Calendars | + *ucal.h | + *Calendar | + *
| Date and Time Formatting | + *udat.h | + *DateFormat | + *
| Message Formatting | + *umsg.h | + *MessageFormat | + *
| Number Formatting | + *unum.h | + *NumberFormat | + *
| Number Spellout (Rule Based Number Formatting) |
+ * unum.h (use UNUM_SPELLOUT) |
+ * RuleBasedNumberFormat | + *
| Text Transformation (Transliteration) |
+ * utrans.h | + *Transliterator | + *
| Bidirectional Algorithm | + *ubidi.h | + *ubidi.h C API | + *
| Arabic Shaping | + *ushape.h | + *ushape.h C API | + *
| Collation | + *ucol.h | + *Collator | + *
| String Searching | + *usearch.h | + *StringSearch | + *
| Text Boundary Analysis (Break Iteration) |
+ * ubrk.h | + *BreakIterator | + *
| Unicode Set | + *uset.h | + *UnicodeSet | + *
| Regular Expressions | + *uregex.h | + *RegexPattern, RegexMatcher | + *
| StringPrep | + *usprep.h | + *usprep.h C API | + *
| International Domain Names in Applications | + *uidna.h | + *uidna.h C API | + *
| Universal Time Scale | + *utmscale.h | + *utmscale.h C API | + *
| Basic Layout Engine Types and Constants | + *(no C API) | + *LETypes.h | + *
| Complex Text Layout | + *(no C API) | + *LayoutEngine, ParagraphLayout | + *
| ICU I/O | + *ustdio.h | + *ustream.h | + *
Locale object represents a specific geographical, political,
+ * or cultural region. An operation that requires a Locale to perform
+ * its task is called locale-sensitive and uses the Locale
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture.
+ *
+ * The Locale class is not suitable for subclassing.
+ *
+ *
+ * You can create a Locale object using the constructor in
+ * this class:
+ * \htmlonly
\endhtmlonly + *\endhtmlonly + * The first argument to the constructors is a valid ISO + * Language Code. These codes are the lower-case two-letter + * codes as defined by ISO-639. + * You can find a full list of these codes at: + *+ * Locale( const char* language, + * const char* country, + * const char* variant); + *+ * \htmlonly
+ * The second argument to the constructors is a valid ISO Country
+ * Code. These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ *
+ * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html
+ *
+ *
+ * The third constructor requires a third argument--the Variant. + * The Variant codes are vendor and browser-specific. + * For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX. + * Where there are two variants, separate them with an underscore, and + * put the most important one first. For + * example, a Traditional Spanish collation might be referenced, with + * "ES", "ES", "Traditional_POSIX". + * + *
+ * Because a Locale object is just an identifier for a region,
+ * no validity check is performed when you construct a Locale.
+ * If you want to see whether particular resources are available for the
+ * Locale you construct, you must query those resources. For
+ * example, ask the NumberFormat for the locales it supports
+ * using its getAvailableLocales method.
+ *
Note: When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * ResourceBundle.
+ *
+ *
+ * The Locale class provides a number of convenient constants
+ * that you can use to create Locale objects for commonly used
+ * locales. For example, the following refers to a Locale object
+ * for the United States:
+ * \htmlonly
\endhtmlonly + *\endhtmlonly + * + *+ * Locale::getUS() + *+ * \htmlonly
+ * Once you've created a Locale you can query it for information about
+ * itself. Use getCountry to get the ISO Country Code and
+ * getLanguage to get the ISO Language Code. You can
+ * use getDisplayCountry to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use getDisplayLanguage to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the getDisplayXXX methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ *
+ * ICU provides a number of classes that perform locale-sensitive
+ * operations. For example, the NumberFormat class formats
+ * numbers, currency, or percentages in a locale-sensitive manner. Classes
+ * such as NumberFormat have a number of convenience methods
+ * for creating a default object of that type. For example, the
+ * NumberFormat class provides these three convenience methods
+ * for creating a default NumberFormat object:
+ * \htmlonly
\endhtmlonly + *\endhtmlonly + * Each of these methods has two variants; one with an explicit locale + * and one without; the latter using the default locale. + * \htmlonly+ * UErrorCode success = U_ZERO_ERROR; + * Locale myLocale; + * NumberFormat *nf; + * + * nf = NumberFormat::createInstance( success ); delete nf; + * nf = NumberFormat::createCurrencyInstance( success ); delete nf; + * nf = NumberFormat::createPercentInstance( success ); delete nf; + *+ * \htmlonly
\endhtmlonly + *\endhtmlonly + * A+ * nf = NumberFormat::createInstance( myLocale, success ); delete nf; + * nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf; + * nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf; + *+ * \htmlonly
Locale is the mechanism for identifying the kind of object
+ * (NumberFormat) that you would like to get. The locale is
+ * just a mechanism for identifying objects,
+ * not a container for the objects themselves.
+ *
+ * + * Each class that performs locale-sensitive operations allows you + * to get all the available objects of that type. You can sift + * through these objects by language, country, or variant, + * and use the display names to present a menu to the user. + * For example, you can create a menu of all the collation objects + * suitable for a given language. Such classes implement these + * three class methods: + * \htmlonly
\endhtmlonly + *\endhtmlonly + * + * @stable ICU 2.0 + * @see ResourceBundle + */ +U_NAMESPACE_BEGIN +class U_COMMON_API Locale : public UObject { +public: + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getEnglish(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getFrench(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getGerman(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getItalian(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getJapanese(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getKorean(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getChinese(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getSimplifiedChinese(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getTraditionalChinese(void); + + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getFrance(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getGermany(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getItaly(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getJapan(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getKorea(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getChina(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getPRC(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getTaiwan(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getUK(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getUS(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getCanada(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getCanadaFrench(void); + + + /** + * Construct a default locale object, a Locale for the default locale ID. + * + * @see getDefault + * @see uloc_getDefault + * @stable ICU 2.0 + */ + Locale(); + + /** + * Construct a locale from language, country, variant. + * If an error occurs, then the constructed object will be "bogus" + * (isBogus() will return TRUE). + * + * @param language Lowercase two-letter or three-letter ISO-639 code. + * This parameter can instead be an ICU style C locale (e.g. "en_US"), + * but the other parameters must not be used. + * This parameter can be NULL; if so, + * the locale is initialized to match the current default locale. + * (This is the same as using the default constructor.) + * Please note: The Java Locale class does NOT accept the form + * 'new Locale("en_US")' but only 'new Locale("en","US")' + * + * @param country Uppercase two-letter ISO-3166 code. (optional) + * @param variant Uppercase vendor and browser specific code. See class + * description. (optional) + * @param keywordsAndValues A string consisting of keyword/values pairs, such as + * "collation=phonebook;currency=euro" + * + * @see getDefault + * @see uloc_getDefault + * @stable ICU 2.0 + */ + Locale( const char * language, + const char * country = 0, + const char * variant = 0, + const char * keywordsAndValues = 0); + + /** + * Initializes a Locale object from another Locale object. + * + * @param other The Locale object being copied in. + * @stable ICU 2.0 + */ + Locale(const Locale& other); + + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~Locale() ; + + /** + * Replaces the entire contents of *this with the specified value. + * + * @param other The Locale object being copied in. + * @return *this + * @stable ICU 2.0 + */ + Locale& operator=(const Locale& other); + + /** + * Checks if two locale keys are the same. + * + * @param other The locale key object to be compared with this. + * @return True if the two locale keys are the same, false otherwise. + * @stable ICU 2.0 + */ + UBool operator==(const Locale& other) const; + + /** + * Checks if two locale keys are not the same. + * + * @param other The locale key object to be compared with this. + * @return True if the two locale keys are not the same, false + * otherwise. + * @stable ICU 2.0 + */ + UBool operator!=(const Locale& other) const; + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + Locale *clone() const; + + /** + * Common methods of getting the current default Locale. Used for the + * presentation: menus, dialogs, etc. Generally set once when your applet or + * application is initialized, then never reset. (If you do reset the + * default locale, you probably want to reload your GUI, so that the change + * is reflected in your interface.) + * + * More advanced programs will allow users to use different locales for + * different fields, e.g. in a spreadsheet. + * + * Note that the initial setting will match the host system. + * @return a reference to the Locale object for the default locale ID + * @system + * @stable ICU 2.0 + */ + static const Locale& U_EXPORT2 getDefault(void); + + /** + * Sets the default. Normally set once at the beginning of a process, + * then never reset. + * setDefault() only changes ICU's default locale ID, not + * the default locale ID of the runtime environment. + * + * @param newLocale Locale to set to. If NULL, set to the value obtained + * from the runtime environement. + * @param success The error code. + * @system + * @stable ICU 2.0 + */ + static void U_EXPORT2 setDefault(const Locale& newLocale, + UErrorCode& success); + + /** + * Creates a locale which has had minimal canonicalization + * as per uloc_getName(). + * @param name The name to create from. If name is null, + * the default Locale is used. + * @return new locale object + * @stable ICU 2.0 + * @see uloc_getName + */ + static Locale U_EXPORT2 createFromName(const char *name); + + /** + * Creates a locale from the given string after canonicalizing + * the string by calling uloc_canonicalize(). + * @param name the locale ID to create from. Must not be NULL. + * @return a new locale object corresponding to the given name + * @stable ICU 3.0 + * @see uloc_canonicalize + */ + static Locale U_EXPORT2 createCanonical(const char* name); + + /** + * Returns the locale's ISO-639 language code. + * @return An alias to the code + * @stable ICU 2.0 + */ + inline const char * getLanguage( ) const; + + /** + * Returns the locale's ISO-15924 abbreviation script code. + * @return An alias to the code + * @see uscript_getShortName + * @see uscript_getCode + * @stable ICU 2.8 + */ + inline const char * getScript( ) const; + + /** + * Returns the locale's ISO-3166 country code. + * @return An alias to the code + * @stable ICU 2.0 + */ + inline const char * getCountry( ) const; + + /** + * Returns the locale's variant code. + * @return An alias to the code + * @stable ICU 2.0 + */ + inline const char * getVariant( ) const; + + /** + * Returns the programmatic name of the entire locale, with the language, + * country and variant separated by underbars. If a field is missing, up + * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", + * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" + * @return A pointer to "name". + * @stable ICU 2.0 + */ + inline const char * getName() const; + + /** + * Returns the programmatic name of the entire locale as getName would return, + * but without keywords. + * @return A pointer to "name". + * @see getName + * @stable ICU 2.8 + */ + const char * getBaseName() const; + + + /** + * Gets the list of keywords for the specified locale. + * + * @return pointer to StringEnumeration class. Client must dispose of it by calling delete. + * @param status Returns any error information while performing this operation. + * @stable ICU 2.8 + */ + StringEnumeration * createKeywords(UErrorCode &status) const; + + /** + * Get the value for a keyword. + * + * @param keywordName name of the keyword for which we want the value. Case insensitive. + * @param status Returns any error information while performing this operation. + * @param buffer The buffer to receive the keyword value. + * @param bufferCapacity The capacity of receiving buffer + * @return the length of keyword value + * + * @stable ICU 2.8 + */ + int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const; + + /** + * returns the locale's three-letter language code, as specified + * in ISO draft standard ISO-639-2. + * @return An alias to the code, or NULL + * @stable ICU 2.0 + */ + const char * getISO3Language() const; + + /** + * Fills in "name" with the locale's three-letter ISO-3166 country code. + * @return An alias to the code, or NULL + * @stable ICU 2.0 + */ + const char * getISO3Country() const; + + /** + * Returns the Windows LCID value corresponding to this locale. + * This value is stored in the resource data for the locale as a one-to-four-digit + * hexadecimal number. If the resource is missing, in the wrong format, or + * there is no Windows LCID value that corresponds to this locale, returns 0. + * @stable ICU 2.0 + */ + uint32_t getLCID(void) const; + + /** + * Fills in "dispLang" with the name of this locale's language in a format suitable for + * user display in the default locale. For example, if the locale's language code is + * "fr" and the default locale's language code is "en", this function would set + * dispLang to "French". + * @param dispLang Receives the language's display name. + * @return A reference to "dispLang". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const; + + /** + * Fills in "dispLang" with the name of this locale's language in a format suitable for + * user display in the locale specified by "displayLocale". For example, if the locale's + * language code is "en" and displayLocale's language code is "fr", this function would set + * dispLang to "Anglais". + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * displayLocale would result in "Anglais", while passing Locale::getGerman() + * for displayLocale would result in "Englisch". + * @param dispLang Receives the language's display name. + * @return A reference to "dispLang". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayLanguage( const Locale& displayLocale, + UnicodeString& dispLang) const; + + /** + * Fills in "dispScript" with the name of this locale's script in a format suitable + * for user display in the default locale. For example, if the locale's script code + * is "LATN" and the default locale's language code is "en", this function would set + * dispScript to "Latin". + * @param dispScript Receives the scripts's display name. + * @return A reference to "dispScript". + * @stable ICU 2.8 + */ + UnicodeString& getDisplayScript( UnicodeString& dispScript) const; + + /** + * Fills in "dispScript" with the name of this locale's country in a format suitable + * for user display in the locale specified by "displayLocale". For example, if the locale's + * script code is "LATN" and displayLocale's language code is "en", this function would set + * dispScript to "Latin". + * @param displayLocale Specifies the locale to be used to display the name. In other + * words, if the locale's script code is "LATN", passing + * Locale::getFrench() for displayLocale would result in "", while + * passing Locale::getGerman() for displayLocale would result in + * "". + * @param dispScript Receives the scripts's display name. + * @return A reference to "dispScript". + * @stable ICU 2.8 + */ + UnicodeString& getDisplayScript( const Locale& displayLocale, + UnicodeString& dispScript) const; + + /** + * Fills in "dispCountry" with the name of this locale's country in a format suitable + * for user display in the default locale. For example, if the locale's country code + * is "FR" and the default locale's language code is "en", this function would set + * dispCountry to "France". + * @param dispCountry Receives the country's display name. + * @return A reference to "dispCountry". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const; + + /** + * Fills in "dispCountry" with the name of this locale's country in a format suitable + * for user display in the locale specified by "displayLocale". For example, if the locale's + * country code is "US" and displayLocale's language code is "fr", this function would set + * dispCountry to "États-Unis". + * @param displayLocale Specifies the locale to be used to display the name. In other + * words, if the locale's country code is "US", passing + * Locale::getFrench() for displayLocale would result in "États-Unis", while + * passing Locale::getGerman() for displayLocale would result in + * "Vereinigte Staaten". + * @param dispCountry Receives the country's display name. + * @return A reference to "dispCountry". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayCountry( const Locale& displayLocale, + UnicodeString& dispCountry) const; + + /** + * Fills in "dispVar" with the name of this locale's variant code in a format suitable + * for user display in the default locale. + * @param dispVar Receives the variant's name. + * @return A reference to "dispVar". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayVariant( UnicodeString& dispVar) const; + + /** + * Fills in "dispVar" with the name of this locale's variant code in a format + * suitable for user display in the locale specified by "displayLocale". + * @param displayLocale Specifies the locale to be used to display the name. + * @param dispVar Receives the variant's display name. + * @return A reference to "dispVar". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayVariant( const Locale& displayLocale, + UnicodeString& dispVar) const; + + /** + * Fills in "name" with the name of this locale in a format suitable for user display + * in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(), + * and getDisplayVariant() to do its work, and outputs the display name in the format + * "language (country[,variant])". For example, if the default locale is en_US, then + * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name + * would be "Spanish (Mexico,Traditional)". + * @param name Receives the locale's display name. + * @return A reference to "name". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName( UnicodeString& name) const; + + /** + * Fills in "name" with the name of this locale in a format suitable for user display + * in the locale specfied by "displayLocale". This function uses getDisplayLanguage(), + * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display + * name in the format "language (country[,variant])". For example, if displayLocale is + * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's + * display name would be "norvégien (Norvège,NY)". + * @param displayLocale Specifies the locale to be used to display the name. + * @param name Receives the locale's display name. + * @return A reference to "name". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName( const Locale& displayLocale, + UnicodeString& name) const; + + /** + * Generates a hash code for the locale. + * @stable ICU 2.0 + */ + int32_t hashCode(void) const; + + /** + * Sets the locale to bogus + * A bogus locale represents a non-existing locale associated + * with services that can be instantiated from non-locale data + * in addition to locale (for example, collation can be + * instantiated from a locale and from a rule set). + * @stable ICU 2.1 + */ + void setToBogus(); + + /** + * Gets the bogus state. Locale object can be bogus if it doesn't exist + * @return FALSE if it is a real locale, TRUE if it is a bogus locale + * @stable ICU 2.1 + */ + UBool isBogus(void) const; + + /** + * Returns a list of all installed locales. + * @param count Receives the number of locales in the list. + * @return A pointer to an array of Locale objects. This array is the list + * of all locales with installed resource files. The called does NOT + * get ownership of this list, and must NOT delete it. + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Gets a list of all available 2-letter country codes defined in ISO 639. This is a + * pointer to an array of pointers to arrays of char. All of these pointers are + * owned by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available country codes + * @stable ICU 2.0 + */ + static const char* const* U_EXPORT2 getISOCountries(); + + /** + * Gets a list of all available language codes defined in ISO 639. This is a pointer + * to an array of pointers to arrays of char. All of these pointers are owned + * by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available language codes + * @stable ICU 2.0 + */ + static const char* const* U_EXPORT2 getISOLanguages(); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +protected: /* only protected for testing purposes. DO NOT USE. */ + /** + * Set this from a single POSIX style locale string. + * @internal + */ + void setFromPOSIXID(const char *posixID); + +private: + /** + * Initialize the locale object with a new name. + * Was deprecated - used in implementation - moved internal + * + * @param cLocaleID The new locale name. + */ + Locale& init(const char* cLocaleID, UBool canonicalize); + + /* + * Internal constructor to allow construction of a locale object with + * NO side effects. (Default constructor tries to get + * the default locale.) + */ + enum ELocaleType { + eBOGUS + }; + Locale(ELocaleType); + + /** + * Initialize the locale cache for commonly used locales + */ + static Locale *getLocaleCache(void); + + char language[ULOC_LANG_CAPACITY]; + char script[ULOC_SCRIPT_CAPACITY]; + char country[ULOC_COUNTRY_CAPACITY]; + int32_t variantBegin; + char* fullName; + char fullNameBuffer[ULOC_FULLNAME_CAPACITY]; + // name without keywords + char* baseName; + char baseNameBuffer[ULOC_FULLNAME_CAPACITY]; + + UBool fIsBogus; + + static const Locale &getLocale(int locid); + + /** + * A friend to allow the default locale to be set by either the C or C++ API. + * @internal + */ + friend void locale_set_default_internal(const char *); +}; + +inline UBool +Locale::operator!=(const Locale& other) const +{ + return !operator==(other); +} + +inline const char * +Locale::getCountry() const +{ + return country; +} + +inline const char * +Locale::getLanguage() const +{ + return language; +} + +inline const char * +Locale::getScript() const +{ + return script; +} + +inline const char * +Locale::getVariant() const +{ + return &fullName[variantBegin]; +} + +inline const char * +Locale::getName() const +{ + return fullName; +} + +inline UBool +Locale::isBogus(void) const { + return fIsBogus; +} + +U_NAMESPACE_END + +#endif + diff --git a/jni/EastAsianWidth/unicode/normlzr.h b/jni/EastAsianWidth/unicode/normlzr.h new file mode 100644 index 0000000..7974f1a --- /dev/null +++ b/jni/EastAsianWidth/unicode/normlzr.h @@ -0,0 +1,823 @@ +/* + ******************************************************************** + * COPYRIGHT: + * Copyright (c) 1996-2006, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************** + */ + +#ifndef NORMLZR_H +#define NORMLZR_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Unicode Normalization + */ + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/unorm.h" + + +struct UCharIterator; +typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ + +U_NAMESPACE_BEGIN +/** + * The Normalizer class supports the standard normalization forms described in + * + * Unicode Standard Annex #15: Unicode Normalization Forms. + * + * The Normalizer class consists of two parts: + * - static functions that normalize strings or test if strings are normalized + * - a Normalizer object is an iterator that takes any kind of text and + * provides iteration over its normalized form + * + * The Normalizer class is not suitable for subclassing. + * + * The static functions are basically wrappers around the C implementation, + * using UnicodeString instead of UChar*. + * For basic information about normalization forms and details about the C API + * please see the documentation in unorm.h. + * + * The iterator API with the Normalizer constructors and the non-static functions + * uses a CharacterIterator as input. It is possible to pass a string which + * is then internally wrapped in a CharacterIterator. + * The input text is not normalized all at once, but incrementally where needed + * (providing efficient random access). + * This allows to pass in a large text but spend only a small amount of time + * normalizing a small part of that text. + * However, if the entire text is normalized, then the iterator will be + * slower than normalizing the entire text at once and iterating over the result. + * A possible use of the Normalizer iterator is also to report an index into the + * original text that is close to where the normalized characters come from. + * + * Important: The iterator API was cleaned up significantly for ICU 2.0. + * The earlier implementation reported the getIndex() inconsistently, + * and previous() could not be used after setIndex(), next(), first(), and current(). + * + * Normalizer allows to start normalizing from anywhere in the input text by + * calling setIndexOnly(), first(), or last(). + * Without calling any of these, the iterator will start at the beginning of the text. + * + * At any time, next() returns the next normalized code point (UChar32), + * with post-increment semantics (like CharacterIterator::next32PostInc()). + * previous() returns the previous normalized code point (UChar32), + * with pre-decrement semantics (like CharacterIterator::previous32()). + * + * current() returns the current code point + * (respectively the one at the newly set index) without moving + * the getIndex(). Note that if the text at the current position + * needs to be normalized, then these functions will do that. + * (This is why current() is not const.) + * It is more efficient to call setIndexOnly() instead, which does not + * normalize. + * + * getIndex() always refers to the position in the input text where the normalized + * code points are returned from. It does not always change with each returned + * code point. + * The code point that is returned from any of the functions + * corresponds to text at or after getIndex(), according to the + * function's iteration semantics (post-increment or pre-decrement). + * + * next() returns a code point from at or after the getIndex() + * from before the next() call. After the next() call, the getIndex() + * might have moved to where the next code point will be returned from + * (from a next() or current() call). + * This is semantically equivalent to array access with array[index++] + * (post-increment semantics). + * + * previous() returns a code point from at or after the getIndex() + * from after the previous() call. + * This is semantically equivalent to array access with array[--index] + * (pre-decrement semantics). + * + * Internally, the Normalizer iterator normalizes a small piece of text + * starting at the getIndex() and ending at a following "safe" index. + * The normalized results is stored in an internal string buffer, and + * the code points are iterated from there. + * With multiple iteration calls, this is repeated until the next piece + * of text needs to be normalized, and the getIndex() needs to be moved. + * + * The following "safe" index, the internal buffer, and the secondary + * iteration index into that buffer are not exposed on the API. + * This also means that it is currently not practical to return to + * a particular, arbitrary position in the text because one would need to + * know, and be able to set, in addition to the getIndex(), at least also the + * current index into the internal buffer. + * It is currently only possible to observe when getIndex() changes + * (with careful consideration of the iteration semantics), + * at which time the internal index will be 0. + * For example, if getIndex() is different after next() than before it, + * then the internal index is 0 and one can return to this getIndex() + * later with setIndexOnly(). + * + * @author Laura Werner, Mark Davis, Markus Scherer + * @stable ICU 2.0 + */ +class U_COMMON_API Normalizer : public UObject { +public: + /** + * If DONE is returned from an iteration function that returns a code point, + * then there are no more normalization results available. + * @stable ICU 2.0 + */ + enum { + DONE=0xffff + }; + + // Constructors + + /** + * Creates a new+ * static Locale* getAvailableLocales(int32_t& numLocales) + * static UnicodeString& getDisplayName(const Locale& objectLocale, + * const Locale& displayLocale, + * UnicodeString& displayName) + * static UnicodeString& getDisplayName(const Locale& objectLocale, + * UnicodeString& displayName) + *+ * \htmlonly
Normalizer object for iterating over the
+ * normalized form of a given string.
+ *
+ * @param str The string to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param mode The normalization mode.
+ * @stable ICU 2.0
+ */
+ Normalizer(const UnicodeString& str, UNormalizationMode mode);
+
+ /**
+ * Creates a new Normalizer object for iterating over the
+ * normalized form of a given string.
+ *
+ * @param str The string to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param length Length of the string, or -1 if NUL-terminated.
+ * @param mode The normalization mode.
+ * @stable ICU 2.0
+ */
+ Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
+
+ /**
+ * Creates a new Normalizer object for iterating over the
+ * normalized form of the given text.
+ *
+ * @param iter The input text to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param mode The normalization mode.
+ * @stable ICU 2.0
+ */
+ Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
+
+ /**
+ * Copy constructor.
+ * @param copy The object to be copied.
+ * @stable ICU 2.0
+ */
+ Normalizer(const Normalizer& copy);
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~Normalizer();
+
+
+ //-------------------------------------------------------------------------
+ // Static utility methods
+ //-------------------------------------------------------------------------
+
+ /**
+ * Normalizes a
+ * Note:If the normalization mode is changed while iterating
+ * over a string, calls to {@link #next() } and {@link #previous() } may
+ * return previously buffers characters in the old normalization mode
+ * until the iteration is able to re-sync at the next base character.
+ * It is safest to call {@link #setIndexOnly }, {@link #reset() },
+ * {@link #setText }, {@link #first() },
+ * {@link #last() }, etc. after calling
+ * @param newMode the new mode for this
+ * @param option the option(s) that are to be checked
+ * @return TRUE if any of the option(s) are set
+ * @see #setOption
+ * @stable ICU 2.0
+ */
+ UBool getOption(int32_t option) const;
+
+ /**
+ * Set the input text over which this The line, offset, and context fields are optional; parsing
+ * engines may choose not to use to use them.
+ *
+ * The preContext and postContext strings include some part of the
+ * context surrounding the error. If the source text is "let for=7"
+ * and "for" is the error (e.g., because it is a reserved word), then
+ * some examples of what a parser might produce are the following:
+ *
+ * Examples of engines which use UParseError (or may use it in the
+ * future) are Transliterator, RuleBasedBreakIterator, and
+ * RegexPattern.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UParseError {
+
+ /**
+ * The line on which the error occured. If the parser uses this
+ * field, it sets it to the line number of the source text line on
+ * which the error appears, which will be be a value >= 1. If the
+ * parse does not support line numbers, the value will be <= 0.
+ * @stable ICU 2.0
+ */
+ int32_t line;
+
+ /**
+ * The character offset to the error. If the line field is >= 1,
+ * then this is the offset from the start of the line. Otherwise,
+ * this is the offset from the start of the text. If the parser
+ * does not support this field, it will have a value < 0.
+ * @stable ICU 2.0
+ */
+ int32_t offset;
+
+ /**
+ * Textual context before the error. Null-terminated. The empty
+ * string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar preContext[U_PARSE_CONTEXT_LEN];
+
+ /**
+ * The error itself and/or textual context after the error.
+ * Null-terminated. The empty string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar postContext[U_PARSE_CONTEXT_LEN];
+
+} UParseError;
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/parsepos.h b/jni/EastAsianWidth/unicode/parsepos.h
new file mode 100644
index 0000000..cdf49e0
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/parsepos.h
@@ -0,0 +1,230 @@
+/*
+* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File PARSEPOS.H
+*
+* Modification History:
+*
+* Date Name Description
+* 07/09/97 helena Converted from java.
+* 07/17/98 stephen Added errorIndex support.
+* 05/11/99 stephen Cleaned up.
+*******************************************************************************
+*/
+
+#ifndef PARSEPOS_H
+#define PARSEPOS_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+/**
+ *
+ * By design, as you parse through a string with different formats,
+ * you can use the same Instances of this class are most commonly created by the factory methods of
+ * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
+ * and then used via the abstract API in class BreakIterator See the ICU User Guide for information on Break Iterator Rules. This class is not intended to be subclassed. (Class DictionaryBasedBreakIterator
+ * is a subclass, but that relationship is effectively internal to the ICU
+ * implementation. The subclassing interface to RulesBasedBreakIterator is
+ * not part of the ICU API, and may not remain stable.
+ * Return a CharacterIterator over the text being analyzed.
+ * The returned character iterator is owned by the break iterator, and must
+ * not be deleted by the caller. Repeated calls to this function may
+ * return the same CharacterIterator.
+ *
+ * The returned character iterator must not be used concurrently with
+ * the break iterator. If concurrent operation is needed, clone the
+ * returned character iterator first and operate on the clone.
+ *
+ * When the break iterator is operating on text supplied via a UText,
+ * this function will fail. Lacking any way to signal failures, it
+ * returns an CharacterIterator containing no text.
+ * The function getUText() provides similar functionality,
+ * is reliable, and is more efficient.
+ *
+ * Of the standard types of ICU break iterators, only word break and
+ * line break provide status values. The values are defined in
+ * the header file ubrk.h. For Word breaks, the status allows distinguishing between words
+ * that contain alphabetic letters, "words" that appear to be numbers,
+ * punctuation and spaces, words containing ideographic characters, and
+ * more. For Line Break, the status distinguishes between hard (mandatory) breaks
+ * and soft (potential) break positions.
+ *
+ *
+ * When creating custom break rules, one is free to define whatever
+ * status values may be convenient for the application.
+ *
+ * Note: this function is not thread safe. It should not have been
+ * declared const, and the const remains only for compatibility
+ * reasons. (The function is logically const, but not bit-wise const).
+ *
+ * @return the status from the break rule that determined the most recently
+ * returned break position.
+ *
+ * @see UWordBreak
+ * @stable ICU 2.2
+ */
+ virtual int32_t getRuleStatus() const;
+
+ /**
+ * Get the status (tag) values from the break rule(s) that determined the most
+ * recently returned break position.
+ *
+ * The returned status value(s) are stored into an array provided by the caller.
+ * The values are stored in sorted (ascending) order.
+ * If the capacity of the output array is insufficient to hold the data,
+ * the output will be truncated to the available length, and a
+ * U_BUFFER_OVERFLOW_ERROR will be signaled.
+ *
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attemtping to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the most recent boundary returned by the break iterator.
+ * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
+ * is the total number of status values that were available,
+ * not the reduced number that were actually returned.
+ * @see getRuleStatus
+ * @stable ICU 3.0
+ */
+ virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
+ * This method is to implement a simple version of RTTI, since not all
+ * C++ compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+ /**
+ * Returns the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ *
+ * Base* polymorphic_pointer = createPolymorphicObject();
+ * if (polymorphic_pointer->getDynamicClassID() ==
+ * Derived::getStaticClassID()) ...
+ *
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /*
+ * Create a clone (copy) of this break iterator in memory provided
+ * by the caller. The idea is to increase performance by avoiding
+ * a storage allocation. Use of this functoin is NOT RECOMMENDED.
+ * Performance gains are minimal, and correct buffer management is
+ * tricky. Use clone() instead.
+ *
+ * @param stackBuffer The pointer to the memory into which the cloned object
+ * should be placed. If NULL, allocate heap memory
+ * for the cloned object.
+ * @param BufferSize The size of the buffer. If zero, return the required
+ * buffer size, but do not clone the object. If the
+ * size was too small (but not zero), allocate heap
+ * storage for the cloned object.
+ *
+ * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
+ * returned if the the provided buffer was too small, and
+ * the clone was therefore put on the heap.
+ *
+ * @return Pointer to the clone object. This may differ from the stackBuffer
+ * address if the byte alignment of the stack buffer was not suitable
+ * or if the stackBuffer was too small to hold the clone.
+ * @stable ICU 2.0
+ */
+ virtual BreakIterator * createBufferClone(void *stackBuffer,
+ int32_t &BufferSize,
+ UErrorCode &status);
+
+
+ /**
+ * Return the binary form of compiled break rules,
+ * which can then be used to create a new break iterator at some
+ * time in the future. Creating a break iterator from pre-compiled rules
+ * is much faster than building one from the source form of the
+ * break rules.
+ *
+ * The binary data can only be used with the same version of ICU
+ * and on the same platform type (processor endian-ness)
+ *
+ * @param length Returns the length of the binary data. (Out paramter.)
+ *
+ * @return A pointer to the binary (compiled) rule data. The storage
+ * belongs to the RulesBasedBreakIterator object, not the
+ * caller, and must not be modified or deleted.
+ * @internal
+ */
+ virtual const uint8_t *getBinaryRules(uint32_t &length);
+
+
+protected:
+ //=======================================================================
+ // implementation
+ //=======================================================================
+ /**
+ * Dumps caches and performs other actions associated with a complete change
+ * in text or iteration position.
+ * @internal
+ */
+ virtual void reset(void);
+
+#if 0
+ /**
+ * Return true if the category lookup for this char
+ * indicates that it is in the set of dictionary lookup chars.
+ * This function is intended for use by dictionary based break iterators.
+ * @return true if the category lookup for this char
+ * indicates that it is in the set of dictionary lookup chars.
+ * @internal
+ */
+ virtual UBool isDictionaryChar(UChar32);
+
+ /**
+ * Get the type of the break iterator.
+ * @internal
+ */
+ virtual int32_t getBreakType() const;
+#endif
+
+ /**
+ * Set the type of the break iterator.
+ * @internal
+ */
+ virtual void setBreakType(int32_t type);
+
+ /**
+ * Common initialization function, used by constructors and bufferClone.
+ * (Also used by DictionaryBasedBreakIterator::createBufferClone().)
+ * @internal
+ */
+ void init();
+
+private:
+
+ /**
+ * This method backs the iterator back up to a "safe position" in the text.
+ * This is a position that we know, without any context, must be a break position.
+ * The various calling methods then iterate forward from this safe position to
+ * the appropriate position to return. (For more information, see the description
+ * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
+ * @param statetable state table used of moving backwards
+ * @internal
+ */
+ int32_t handlePrevious(const RBBIStateTable *statetable);
+
+ /**
+ * This method is the actual implementation of the next() method. All iteration
+ * vectors through here. This method initializes the state machine to state 1
+ * and advances through the text character by character until we reach the end
+ * of the text or the state machine transitions to state 0. We update our return
+ * value every time the state machine passes through a possible end state.
+ * @param statetable state table used of moving forwards
+ * @internal
+ */
+ int32_t handleNext(const RBBIStateTable *statetable);
+
+protected:
+
+ /**
+ * This is the function that actually implements dictionary-based
+ * breaking. Covering at least the range from startPos to endPos,
+ * it checks for dictionary characters, and if it finds them determines
+ * the appropriate object to deal with them. It may cache found breaks in
+ * fCachedBreakPositions as it goes. It may well also look at text outside
+ * the range startPos to endPos.
+ * If going forward, endPos is the normal Unicode break result, and
+ * if goind in reverse, startPos is the normal Unicode break result
+ * @param startPos The start position of a range of text
+ * @param endPos The end position of a range of text
+ * @param reverse The call is for the reverse direction
+ * @internal
+ */
+ int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
+
+private:
+
+ /**
+ * This function returns the appropriate LanguageBreakEngine for a
+ * given character c.
+ * @param c A character in the dictionary set
+ * @internal
+ */
+ const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
+
+ /**
+ * @internal
+ */
+ void makeRuleStatusValid();
+
+};
+
+//------------------------------------------------------------------------------
+//
+// Inline Functions Definitions ...
+//
+//------------------------------------------------------------------------------
+
+inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
+ return !operator==(that);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/rep.h b/jni/EastAsianWidth/unicode/rep.h
new file mode 100644
index 0000000..3fab6fa
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/rep.h
@@ -0,0 +1,259 @@
+/*
+**************************************************************************
+* Copyright (C) 1999-2005, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation. Ported from java. Modified to
+* match current UnicodeString API. Forced
+* to use name "handleReplaceBetween" because
+* of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Replaceable String
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * An implicit aspect of the For a subclass to support metadata, typical behavior of
+ * Subclasses must ensure that if the text between start and
+ * limit is equal to the replacement text, that replace has no
+ * effect. That is, any metadata
+ * should be unaffected. In addition, subclasses are encouraged to
+ * check for initial and trailing identical characters, and make a
+ * smaller replacement if possible. This will preserve as much
+ * metadata as possible.
+ * @param start the beginning index, inclusive;
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * BNF definition.
+ * More on resource bundle concepts and syntax can be found in the
+ * Users Guide.
+ *
+ *
+ * The ResourceBundle class is not suitable for subclassing.
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ResourceBundle : public UObject {
+public:
+ /**
+ * Constructor
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by The model is that the enumeration is over strings maintained by
+ * a 'service.' At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare). The iterator
+ * returns an error if this has occurred. Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use. Strings may take the form of const char*, const UChar*, or const
+ * UnicodeString*. The type you get is determine by the variant of
+ * 'next' that you call. In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types. Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL. The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset. Return the number of elements that the iterator traverses. If
+ * the iterator is out of sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero. The return value will not change except possibly as a result of
+ * a subsequent call to reset, or if the iterator becomes out of sync. This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched
+ * (depending on the storage format of the data being
+ * traversed). Returns the next element as a NUL-terminated char*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined. The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor. If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. If the native service string is a UChar* string, it is
+ * converted to char* with the invariant converter. If the
+ * conversion fails (because a character cannot be converted) then
+ * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+ * value is undefined (though not NULL). Returns the next element as a NUL-terminated UChar*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined. The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor. If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. Returns the next element a UnicodeString*. If there are no
+ * more elements, returns NULL. The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor. If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned. Resets the iterator. This re-establishes sync with the
+ * service and rewinds the iterator to start at the first
+ * element. Previous pointers returned by next, unext, or snext become
+ * invalid, and the value returned by count might change. A symbol table maintains two kinds of mappings. The first is
+ * between symbolic names and their values. For example, if the
+ * variable with the name "start" is set to the value "alpha"
+ * (perhaps, though not necessarily, through an expression such as
+ * "$start=alpha"), then the call lookup("start") will return the
+ * char[] array ['a', 'l', 'p', 'h', 'a'].
+ *
+ * The second kind of mapping is between character values and
+ * UnicodeMatcher objects. This is used by RuleBasedTransliterator,
+ * which uses characters in the private use area to represent objects
+ * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
+ * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
+ *
+ * Finally, a symbol table defines parsing behavior for symbolic
+ * names. All symbolic names start with the SYMBOL_REF character.
+ * When a parser encounters this character, it calls parseReference()
+ * with the position immediately following the SYMBOL_REF. The symbol
+ * table parses the name, if there is one, and returns it.
+ *
+ * @stable ICU 2.8
+ */
+class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
+public:
+
+ /**
+ * The character preceding a symbol reference name.
+ * @stable ICU 2.8
+ */
+ enum { SYMBOL_REF = 0x0024 /*$*/ };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.8
+ */
+ virtual ~SymbolTable();
+
+ /**
+ * Lookup the characters associated with this string and return it.
+ * Return NULL if no such name exists. The resultant
+ * string may have length zero.
+ * @param s the symbolic name to lookup
+ * @return a string containing the name's value, or NULL if
+ * there is no mapping for s.
+ * @stable ICU 2.8
+ */
+ virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
+
+ /**
+ * Lookup the UnicodeMatcher associated with the given character, and
+ * return it. Return NULL if not found.
+ * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
+ * @return the UnicodeMatcher object represented by the given
+ * character, or NULL if there is no mapping for ch.
+ * @stable ICU 2.8
+ */
+ virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
+
+ /**
+ * Parse a symbol reference name from the given string, starting
+ * at the given position. If no valid symbol reference name is
+ * found, return the empty string and leave pos unchanged. That is, if the
+ * character at pos cannot start a name, or if pos is at or after
+ * text.length(), then return an empty string. This indicates an
+ * isolated SYMBOL_REF character.
+ * @param text the text to parse for the name
+ * @param pos on entry, the index of the first character to parse.
+ * This is the character following the SYMBOL_REF character. On
+ * exit, the index after the last parsed character. If the parse
+ * failed, pos is unchanged on exit.
+ * @param limit the index after the last character to be parsed.
+ * @return the parsed name, or an empty string if there is no
+ * valid symbolic name at the given position.
+ * @stable ICU 2.8
+ */
+ virtual UnicodeString parseReference(const UnicodeString& text,
+ ParsePosition& pos, int32_t limit) const = 0;
+};
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ubidi.h b/jni/EastAsianWidth/unicode/ubidi.h
new file mode 100644
index 0000000..2a487e0
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ubidi.h
@@ -0,0 +1,1999 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidi.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999jul27
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#ifndef UBIDI_H
+#define UBIDI_H
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+/**
+ *\file
+ * \brief C API: Bidi algorithm
+ *
+ *
+ *
+ * Note: Libraries that perform a bidirectional algorithm and
+ * reorder strings accordingly are sometimes called "Storage Layout Engines".
+ * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
+ * "Storage Layout Engines".
+ *
+ *
+ *
+ * The "limit" of a sequence of characters is the position just after their
+ * last character, i.e., one more than that position.
+ *
+ * Some of the API functions provide access to "runs".
+ * Such a "run" is defined as a sequence of characters
+ * that are at the same embedding level
+ * after performing the Bidi algorithm.
+ *
+ * @author Markus W. Scherer
+ * @version 1.0
+ *
+ *
+ * The basic assumptions are:
+ *
+ * It can also hold non-level values for the
+ * The related constants are not real, valid level values.
+ *
+ *
+ * Note that the value for
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 0 (left-to-right).
+ *
+ * If this value is used in conjunction with reordering modes
+ *
+ *
+ * If reordering option
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 1 (right-to-left).
+ *
+ * If this value is used in conjunction with reordering modes
+ *
+ *
+ * If reordering option
+ * This structure holds information about a paragraph (or multiple paragraphs)
+ * of text with Bidi-algorithm-related details, or about one line of
+ * such a paragraph.
+ * Reordering can be done on a line, or on one or more paragraphs which are
+ * then interpreted each as one single line.
+ * @stable ICU 2.0
+ */
+struct UBiDi;
+
+/** @stable ICU 2.0 */
+typedef struct UBiDi UBiDi;
+
+/**
+ * Allocate a
+ * This object can be reused for as long as it is not deallocated
+ * by calling
+ *
+ * Subsequent functions will not allocate any more memory, and are thus
+ * guaranteed not to fail because of lack of memory.
+ * The preallocation can be limited to some of the internal memory
+ * by setting some values to 0 here. That means that if, e.g.,
+ *
+ *
+ * Important:
+ * A parent The normal operation of the Bidi algorithm as described
+ * in the Unicode Technical Report is to take text stored in logical
+ * (keyboard, typing) order and to determine the reordering of it for visual
+ * rendering.
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * to logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi" and that the current implementation provides only an
+ * approximation of "inverse Bidi". With Output runs should be retrieved using Calling this function with argument Note: calling this function after setting the reordering mode with
+ * The normal operation of the Bidi algorithm as described
+ * in the Unicode Standard Annex #9 is to take text stored in logical
+ * (keyboard, typing) order and to determine how to reorder it for visual
+ * rendering. With the reordering mode set to a value other than
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * into logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi", so a number of variants are implemented here. In other cases, it may be desirable to emulate some variant of the
+ * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
+ * Logical to Logical transformation. In all the reordering modes specifying an "inverse Bidi" algorithm
+ * (i.e. those with a name starting with Note that option This option must be set or reset before calling
+ * This option is significant only with reordering modes which generate
+ * a result with Logical order, specifically: If this option is set in conjunction with reordering mode
+ * For other reordering modes, a minimum number of LRM or RLM characters
+ * will be added to the source text after reordering it so as to ensure
+ * round trip, i.e. when applying the inverse reordering mode on the
+ * resulting logical text with removal of Bidi marks
+ * (option This option will be ignored if specified together with option
+ * This option must be set or reset before calling
+ * This option nullifies option This option must be set or reset before calling
+ * This option specifies that the caller is interested in processing large
+ * text object in parts.
+ * The results of the successive calls are expected to be concatenated by the
+ * caller. Only the call for the last part will have this option bit off. When this option bit is on, When the
+ *
+ * This function takes a piece of plain text containing one or more paragraphs,
+ * with or without externally specified embedding levels from styled
+ * text and computes the left-right-directionality of each character.
+ *
+ * If the entire text is all of the same directionality, then
+ * the function may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * @param pBiDi A
+ * In the new line object, the indexes will range from 0 to
+ *
+ * This is used after calling
+ *
+ * After line-breaking, rules (L1) and (L2) for the treatment of
+ * trailing WS and for reordering are performed on
+ * a
+ *
+ * Important:
+ *
+ * The text pointer that was stored in
+ *
+ * @param pBiDi is the paragraph or line
+ *
+ * @param pBiDi is the paragraph
+ *
+ * Note that this function may allocate memory under some
+ * circumstances, unlike
+ * This is especially useful for line-breaking on a paragraph.
+ *
+ * @param pBiDi is the paragraph or line
+ *
+ *
+ * The value returned may be
+ * When the visual output is altered by using options of
+ *
+ * Note that in right-to-left runs, this mapping places
+ * modifier letters before base characters and second surrogates
+ * before first ones.
+ *
+ * @param pBiDi is the paragraph or line
+ *
+ * The value returned may be
+ * This is the inverse function to
+ * When the visual output is altered by using options of
+ *
+ * Some values in the map may be
+ * When the visual output is altered by using options of
+ *
+ * Some values in the map may be
+ * When the visual output is altered by using options of
+ *
+ * The index map will result in
+ * The index map will result in This option does not imply corresponding adjustment of the index
+ * mappings. This option does not imply corresponding adjustment of the index
+ * mappings. This has the same effect as calling Usually, the function pointer will be propagated to a If a This may be useful for assigning Bidi classes to PUA characters, or
+ * for special application needs. For instance, an application may want to
+ * handle all spaces like L or R characters (according to the base direction)
+ * when creating the visual ordering of logical lines which are part of a report
+ * organized in columns: there should not be interaction between adjacent
+ * cells.
+ *
+ * @param pBiDi is the paragraph
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ *
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ *
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ *
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored. For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ *
+ * Title boundary analysis locates all positions,
+ * typically starts of words, that should be set to Title Case
+ * when title casing the text.
+ *
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ *
+ * In addition to the plain C API defined in this header file, an
+ * object oriented C++ API with equivalent functionality is defined in the
+ * file brkiter.h.
+ *
+ * Code snippits illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
+ * and in the sample program icu/source/samples/break/break.cpp"
+ */
+
+/** The possible types of text boundaries. @stable ICU 2.0 */
+typedef enum UBreakIteratorType {
+ /** Character breaks @stable ICU 2.0 */
+ UBRK_CHARACTER = 0,
+ /** Word breaks @stable ICU 2.0 */
+ UBRK_WORD = 1,
+ /** Line breaks @stable ICU 2.0 */
+ UBRK_LINE = 2,
+ /** Sentence breaks @stable ICU 2.0 */
+ UBRK_SENTENCE = 3,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Title Case breaks
+ * The iterator created using this type locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.
+ *
+ * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
+ */
+ UBRK_TITLE = 4,
+#endif /* U_HIDE_DEPRECATED_API */
+ UBRK_COUNT = 5
+} UBreakIteratorType;
+
+/** Value indicating all text boundaries have been returned.
+ * @stable ICU 2.0
+ */
+#define UBRK_DONE ((int32_t) -1)
+
+
+/**
+ * Enum constants for the word break tags returned by
+ * getRuleStatus(). A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @stable ICU 2.2
+*/
+typedef enum UWordBreak {
+ /** Tag value for "words" that do not fit into any of other categories.
+ * Includes spaces and most punctuation. */
+ UBRK_WORD_NONE = 0,
+ /** Upper bound for tags for uncategorized words. */
+ UBRK_WORD_NONE_LIMIT = 100,
+ /** Tag value for words that appear to be numbers, lower limit. */
+ UBRK_WORD_NUMBER = 100,
+ /** Tag value for words that appear to be numbers, upper limit. */
+ UBRK_WORD_NUMBER_LIMIT = 200,
+ /** Tag value for words that contain letters, excluding
+ * hiragana, katakana or ideographic characters, lower limit. */
+ UBRK_WORD_LETTER = 200,
+ /** Tag value for words containing letters, upper limit */
+ UBRK_WORD_LETTER_LIMIT = 300,
+ /** Tag value for words containing kana characters, lower limit */
+ UBRK_WORD_KANA = 300,
+ /** Tag value for words containing kana characters, upper limit */
+ UBRK_WORD_KANA_LIMIT = 400,
+ /** Tag value for words containing ideographic characters, lower limit */
+ UBRK_WORD_IDEO = 400,
+ /** Tag value for words containing ideographic characters, upper limit */
+ UBRK_WORD_IDEO_LIMIT = 500
+} UWordBreak;
+
+/**
+ * Enum constants for the line break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @stable ICU 2.8
+*/
+typedef enum ULineBreakTag {
+ /** Tag value for soft line breaks, positions at which a line break
+ * is acceptable but not required */
+ UBRK_LINE_SOFT = 0,
+ /** Upper bound for soft line breaks. */
+ UBRK_LINE_SOFT_LIMIT = 100,
+ /** Tag value for a hard, or mandatory line break */
+ UBRK_LINE_HARD = 100,
+ /** Upper bound for hard line breaks. */
+ UBRK_LINE_HARD_LIMIT = 200
+} ULineBreakTag;
+
+
+
+/**
+ * Enum constants for the sentence break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * sentence, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @stable ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+ /** Tag value for for sentences ending with a sentence terminator
+ * ('.', '?', '!', etc.) character, possibly followed by a
+ * hard separator (CR, LF, PS, etc.)
+ */
+ UBRK_SENTENCE_TERM = 0,
+ /** Upper bound for tags for sentences ended by sentence terminators. */
+ UBRK_SENTENCE_TERM_LIMIT = 100,
+ /** Tag value for for sentences that do not contain an ending
+ * sentence terminator ('.', '?', '!', etc.) character, but
+ * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+ */
+ UBRK_SENTENCE_SEP = 100,
+ /** Upper bound for tags for sentences ended by a separator. */
+ UBRK_SENTENCE_SEP_LIMIT = 200
+ /** Tag value for a hard, or mandatory line break */
+} USentenceBreakTag;
+
+
+/**
+ * Open a new UBreakIterator for locating text boundaries for a specified locale.
+ * A UBreakIterator may be used for detecting character, line, word,
+ * and sentence breaks in text.
+ * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
+ * UBRK_LINE, UBRK_SENTENCE
+ * @param locale The locale specifying the text-breaking conventions.
+ * @param text The text to be iterated over.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified locale.
+ * @see ubrk_openRules
+ * @stable ICU 2.0
+ */
+U_STABLE UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+ const char *locale,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param parseErr Receives position and context information for any syntax errors
+ * detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @stable ICU 2.2
+ */
+U_STABLE UBreakIterator* U_EXPORT2
+ubrk_openRules(const UChar *rules,
+ int32_t rulesLength,
+ const UChar *text,
+ int32_t textLength,
+ UParseError *parseErr,
+ UErrorCode *status);
+
+/**
+ * Thread safe cloning operation
+ * @param bi iterator to be cloned
+ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space.
+ * If *pBufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If *pBufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UBreakIterator * U_EXPORT2
+ubrk_safeClone(
+ const UBreakIterator *bi,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+/**
+ * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
+ * @stable ICU 2.0
+ */
+#define U_BRK_SAFECLONE_BUFFERSIZE 512
+
+/**
+* Close a UBreakIterator.
+* Once closed, a UBreakIterator may no longer be used.
+* @param bi The break iterator to close.
+ * @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+ubrk_close(UBreakIterator *bi);
+
+/**
+ * Sets an existing iterator to point to a new piece of text
+ * @param bi The iterator to use
+ * @param text The text to be set
+ * @param textLength The length of the text
+ * @param status The error code
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+ const UChar* text,
+ int32_t textLength,
+ UErrorCode* status);
+
+
+/**
+ * Sets an existing iterator to point to a new piece of text
+ * @param bi The iterator to use
+ * @param text The text to be set.
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * UText that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ * @param status The error code
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ubrk_setUText(UBreakIterator* bi,
+ UText* text,
+ UErrorCode* status);
+
+
+
+/**
+ * Determine the most recently-returned text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
+ * \ref ubrk_first, or \ref ubrk_last.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi);
+
+/**
+ * Determine the text boundary following the current text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the next text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_previous
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi);
+
+/**
+ * Determine the text boundary preceding the current text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the preceding text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_next
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi);
+
+/**
+ * Determine the index of the first character in the text being scanned.
+ * This is not always the same as index 0 of the text.
+ * @param bi The break iterator to use.
+ * @return The character index of the first character in the text being scanned.
+ * @see ubrk_last
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi);
+
+/**
+ * Determine the index immediately beyond the last character in the text being
+ * scanned.
+ * This is not the same as the last character.
+ * @param bi The break iterator to use.
+ * @return The character offset immediately beyond the last character in the
+ * text being scanned.
+ * @see ubrk_first
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi);
+
+/**
+ * Determine the text boundary preceding the specified offset.
+ * The value returned is always smaller than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary preceding offset, or UBRK_DONE.
+ * @see ubrk_following
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+ * Determine the text boundary following the specified offset.
+ * The value returned is always greater than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary following offset, or UBRK_DONE.
+ * @see ubrk_preceding
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+* Get a locale for which text breaking information is available.
+* A UBreakIterator in a locale returned by this function will perform the correct
+* text breaking for the locale.
+* @param index The index of the desired locale.
+* @return A locale for which number text breaking information is available, or 0 if none.
+* @see ubrk_countAvailable
+* @stable ICU 2.0
+*/
+U_STABLE const char* U_EXPORT2
+ubrk_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have text breaking information available.
+* This function is most useful as determining the loop ending condition for
+* calls to \ref ubrk_getAvailable.
+* @return The number of locales for which text breaking information is available.
+* @see ubrk_getAvailable
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+ubrk_countAvailable(void);
+
+
+/**
+* Returns true if the specfied position is a boundary position. As a side
+* effect, leaves the iterator pointing to the first boundary position at
+* or after "offset".
+* @param bi The break iterator to use.
+* @param offset the offset to check.
+* @return True if "offset" is a boundary position.
+* @stable ICU 2.0
+*/
+U_STABLE UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
+
+/**
+ * Return the status from the break rule that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. For rules that do not specify a
+ * status, a default value of 0 is returned.
+ *
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Get the statuses from the break rules that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. The default status value for rules
+ * that do not explicitly provide one is zero.
+ *
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @param bi The break iterator to use
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attemtping to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the most recent boundary returned by the break iterator.
+ * @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ucasemap.h b/jni/EastAsianWidth/unicode/ucasemap.h
new file mode 100644
index 0000000..9f5880c
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ucasemap.h
@@ -0,0 +1,395 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2005-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucasemap.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2005may06
+* created by: Markus W. Scherer
+*
+* Case mapping service object and functions using it.
+*/
+
+#ifndef __UCASEMAP_H__
+#define __UCASEMAP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+
+/**
+ * \file
+ * \brief C API: Unicode case mapping functions using a UCaseMap service object.
+ *
+ * The service object takes care of memory allocations, data loading, and setup
+ * for the attributes, as usual.
+ *
+ * Currently, the functionality provided here does not overlap with uchar.h
+ * and ustring.h, except for ucasemap_toTitle().
+ *
+ * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
+ */
+
+/**
+ * UCaseMap is an opaque service object for newer ICU case mapping functions.
+ * Older functions did not use a service object.
+ * @stable ICU 3.4
+ */
+struct UCaseMap;
+typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
+
+/**
+ * Open a UCaseMap service object for a locale and a set of options.
+ * The locale ID and options are preprocessed so that functions using the
+ * service object need not process them in each call.
+ *
+ * @param locale ICU locale ID, used for language-dependent
+ * upper-/lower-/title-casing according to the Unicode standard.
+ * Usual semantics: ""=root, NULL=default locale, etc.
+ * @param options Options bit set, used for case folding and string comparisons.
+ * Same flags as for u_foldCase(), u_strFoldCase(),
+ * u_strCaseCompare(), etc.
+ * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return Pointer to a UCaseMap service object, if successful.
+ *
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.4
+ */
+U_STABLE UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Close a UCaseMap service object.
+ * @param csm Object to be closed.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_close(UCaseMap *csm);
+
+/**
+ * Get the locale ID that is used for language-dependent case mappings.
+ * @param csm UCaseMap service object.
+ * @return locale ID
+ * @stable ICU 3.4
+ */
+U_STABLE const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm);
+
+/**
+ * Get the options bit set that is used for case folding and string comparisons.
+ * @param csm UCaseMap service object.
+ * @return options bit set
+ * @stable ICU 3.4
+ */
+U_STABLE uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm);
+
+/**
+ * Set the locale ID that is used for language-dependent case mappings.
+ *
+ * @param csm UCaseMap service object.
+ * @param locale Locale ID, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
+
+/**
+ * Set the options bit set that is used for case folding and string comparisons.
+ *
+ * @param csm UCaseMap service object.
+ * @param options Options bit set, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @draft ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @draft ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Get the break iterator that is used for titlecasing.
+ * Do not modify the returned break iterator.
+ * @param csm UCaseMap service object.
+ * @return titlecasing break iterator
+ * @draft ICU 3.8
+ */
+U_DRAFT const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm);
+
+/**
+ * Set the break iterator that is used for titlecasing.
+ * The UCaseMap service object releases a previously set break iterator
+ * and "adopts" this new one, taking ownership of it.
+ * It will be released in a subsequent call to ucasemap_setBreakIterator()
+ * or ucasemap_close().
+ *
+ * Break iterator operations are not thread-safe. Therefore, titlecasing
+ * functions use non-const UCaseMap objects. It is not possible to titlecase
+ * strings concurrently using the same UCaseMap.
+ *
+ * @param csm UCaseMap service object.
+ * @param iterToAdopt Break iterator to be adopted for titlecasing.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @draft ICU 3.8
+ */
+U_DRAFT void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
+
+/**
+ * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
+ * except that it takes ucasemap_setOptions() into account and has performance
+ * advantages from being able to use a UCaseMap object for multiple case mapping
+ * operations, saving setup time.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @draft ICU 3.8
+ */
+U_DRAFT int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Lowercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToLower
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Uppercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToUpper
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @draft ICU 3.8
+ */
+U_DRAFT int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-fold the characters in a UTF-8 string.
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @draft ICU 3.8
+ */
+U_DRAFT int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ucat.h b/jni/EastAsianWidth/unicode/ucat.h
new file mode 100644
index 0000000..ad9f037
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ucat.h
@@ -0,0 +1,158 @@
+/*
+**********************************************************************
+* Copyright (c) 2003-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2003
+* Since: ICU 2.6
+**********************************************************************
+*/
+#ifndef UCAT_H
+#define UCAT_H
+
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+
+/**
+ * \file
+ * \brief C API: Message Catalog Wrappers
+ *
+ * This C API provides look-alike functions that deliberately resemble
+ * the POSIX catopen, catclose, and catgets functions. The underlying
+ * implementation is in terms of ICU resource bundles, rather than
+ * POSIX message catalogs.
+ *
+ * The ICU resource bundles obey standard ICU inheritance policies.
+ * To facilitate this, sets and messages are flattened into one tier.
+ * This is done by creating resource bundle keys of the form
+ * <set_num>%<msg_num> where set_num is the set number and msg_num is
+ * the message number, formatted as decimal strings.
+ *
+ * Example: Consider a message catalog containing two sets:
+ *
+ * Set 1: Message 4 = "Good morning."
+ * Message 5 = "Good afternoon."
+ * Message 7 = "Good evening."
+ * Message 8 = "Good night."
+ * Set 4: Message 14 = "Please "
+ * Message 19 = "Thank you."
+ * Message 20 = "Sincerely,"
+ *
+ * The ICU resource bundle source file would, assuming it is named
+ * "greet.txt", would look like this:
+ *
+ * greet
+ * {
+ * 1%4 { "Good morning." }
+ * 1%5 { "Good afternoon." }
+ * 1%7 { "Good evening." }
+ * 1%8 { "Good night." }
+ *
+ * 4%14 { "Please " }
+ * 4%19 { "Thank you." }
+ * 4%20 { "Sincerely," }
+ * }
+ *
+ * The catgets function is commonly used in combination with functions
+ * like printf and strftime. ICU components like message format can
+ * be used instead, although they use a different format syntax.
+ * There is an ICU package, icuio, that provides some of
+ * the POSIX-style formatting API.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * An ICU message catalog descriptor, analogous to nl_catd.
+ *
+ * @stable ICU 2.6
+ */
+typedef UResourceBundle* u_nl_catd;
+
+/**
+ * Open and return an ICU message catalog descriptor. The descriptor
+ * may be passed to u_catgets() to retrieve localized strings.
+ *
+ * @param name string containing the full path pointing to the
+ * directory where the resources reside followed by the package name
+ * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
+ * If NULL, ICU default data files will be used.
+ *
+ * Unlike POSIX, environment variables are not interpolated within the
+ * name.
+ *
+ * @param locale the locale for which we want to open the resource. If
+ * NULL, the default ICU locale will be used (see uloc_getDefault). If
+ * strlen(locale) == 0, the root locale will be used.
+ *
+ * @param ec input/output error code. Upon output,
+ * U_USING_FALLBACK_WARNING indicates that a fallback locale was
+ * used. For example, 'de_CH' was requested, but nothing was found
+ * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
+ * default locale data or root locale data was used; neither the
+ * requested locale nor any of its fallback locales were found.
+ *
+ * @return a message catalog descriptor that may be passed to
+ * u_catgets(). If the ec parameter indicates success, then the caller
+ * is responsible for calling u_catclose() to close the message
+ * catalog. If the ec parameter indicates failure, then NULL will be
+ * returned.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE u_nl_catd U_EXPORT2
+u_catopen(const char* name, const char* locale, UErrorCode* ec);
+
+/**
+ * Close an ICU message catalog, given its descriptor.
+ *
+ * @param catd a message catalog descriptor to be closed. May be NULL,
+ * in which case no action is taken.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+u_catclose(u_nl_catd catd);
+
+/**
+ * Retrieve a localized string from an ICU message catalog.
+ *
+ * @param catd a message catalog descriptor returned by u_catopen.
+ *
+ * @param set_num the message catalog set number. Sets need not be
+ * numbered consecutively.
+ *
+ * @param msg_num the message catalog message number within the
+ * set. Messages need not be numbered consecutively.
+ *
+ * @param s the default string. This is returned if the string
+ * specified by the set_num and msg_num is not found. It must be
+ * zero-terminated.
+ *
+ * @param len fill-in parameter to receive the length of the result.
+ * May be NULL, in which case it is ignored.
+ *
+ * @param ec input/output error code. May be U_USING_FALLBACK_WARNING
+ * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
+ * the set_num/msg_num tuple does not specify a valid message string
+ * in this catalog.
+ *
+ * @return a pointer to a zero-terminated UChar array which lives in
+ * an internal buffer area, typically a memory mapped/DLL file. The
+ * caller must NOT delete this pointer. If the call is unsuccessful
+ * for any reason, then s is returned. This includes the situation in
+ * which ec indicates a failing error code upon entry to this
+ * function.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE const UChar* U_EXPORT2
+u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
+ const UChar* s,
+ int32_t* len, UErrorCode* ec);
+
+U_CDECL_END
+
+#endif /*UCAT_H*/
+/*eof*/
diff --git a/jni/EastAsianWidth/unicode/uchar.h b/jni/EastAsianWidth/unicode/uchar.h
new file mode 100644
index 0000000..b702ec6
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uchar.h
@@ -0,0 +1,3013 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 03/29/99 helena Updated for C APIs.
+* 4/15/99 Madhu Updated for C Implementation and Javadoc
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode 3.0
+* 8/27/1999 schererm UCharDirection constants: U_...
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "5.0"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct: u_ispunct(c)
+ * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl: u_charType(c)==U_CONTROL_CHAR
+ * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ * most of general categories "Z" (separators) + most whitespace ISO controls
+ * (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/*
+ * !! Note: Several comments in this file are machine-read by the
+ * genpname tool. These comments describe the correspondence between
+ * icu enum constants and UCD entities. Do not delete them. Update
+ * these comments as needed.
+ *
+ * Any comment of the form "/ *[name]* /" (spaces added) is such
+ * a comment.
+ *
+ * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
+ * name, which must match PropertyValueAliases.txt.
+ */
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+ /* See note !!. Comments of the form "Binary property Dash",
+ "Enumerated property Script", "Double property Numeric_Value",
+ and "String property Age" are read by genpname. */
+
+ /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+ debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+ rather than UCHAR_BINARY_START. Likewise for other *_START
+ identifiers. */
+
+ /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+ Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+ UCHAR_ALPHABETIC=0,
+ /** First constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+ /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+ UCHAR_ASCII_HEX_DIGIT=1,
+ /** Binary property Bidi_Control.
+ Format controls which have specific functions
+ in the Bidi Algorithm. @stable ICU 2.1 */
+ UCHAR_BIDI_CONTROL=2,
+ /** Binary property Bidi_Mirrored.
+ Characters that may change display in RTL text.
+ Same as u_isMirrored.
+ See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+ UCHAR_BIDI_MIRRORED=3,
+ /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+ UCHAR_DASH=4,
+ /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+ Ignorable in most processing.
+ <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+ UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
+ /** Binary property Deprecated (new in Unicode 3.2).
+ The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+ UCHAR_DEPRECATED=6,
+ /** Binary property Diacritic. Characters that linguistically modify
+ the meaning of another character to which they apply. @stable ICU 2.1 */
+ UCHAR_DIACRITIC=7,
+ /** Binary property Extender.
+ Extend the value or shape of a preceding alphabetic character,
+ e.g., length and iteration marks. @stable ICU 2.1 */
+ UCHAR_EXTENDER=8,
+ /** Binary property Full_Composition_Exclusion.
+ CompositionExclusions.txt+Singleton Decompositions+
+ Non-Starter Decompositions. @stable ICU 2.1 */
+ UCHAR_FULL_COMPOSITION_EXCLUSION=9,
+ /** Binary property Grapheme_Base (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_BASE=10,
+ /** Binary property Grapheme_Extend (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_EXTEND=11,
+ /** Binary property Grapheme_Link (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+ UCHAR_GRAPHEME_LINK=12,
+ /** Binary property Hex_Digit.
+ Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+ UCHAR_HEX_DIGIT=13,
+ /** Binary property Hyphen. Dashes used to mark connections
+ between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+ UCHAR_HYPHEN=14,
+ /** Binary property ID_Continue.
+ Characters that can continue an identifier.
+ DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+ ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+ UCHAR_ID_CONTINUE=15,
+ /** Binary property ID_Start.
+ Characters that can start an identifier.
+ Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+ UCHAR_ID_START=16,
+ /** Binary property Ideographic.
+ CJKV ideographs. @stable ICU 2.1 */
+ UCHAR_IDEOGRAPHIC=17,
+ /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_BINARY_OPERATOR=18,
+ /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_TRINARY_OPERATOR=19,
+ /** Binary property Join_Control.
+ Format controls for cursive joining and ligation. @stable ICU 2.1 */
+ UCHAR_JOIN_CONTROL=20,
+ /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+ Characters that do not use logical order and
+ require special handling in most processing. @stable ICU 2.1 */
+ UCHAR_LOGICAL_ORDER_EXCEPTION=21,
+ /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+ Ll+Other_Lowercase @stable ICU 2.1 */
+ UCHAR_LOWERCASE=22,
+ /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+ UCHAR_MATH=23,
+ /** Binary property Noncharacter_Code_Point.
+ Code points that are explicitly defined as illegal
+ for the encoding of characters. @stable ICU 2.1 */
+ UCHAR_NONCHARACTER_CODE_POINT=24,
+ /** Binary property Quotation_Mark. @stable ICU 2.1 */
+ UCHAR_QUOTATION_MARK=25,
+ /** Binary property Radical (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_RADICAL=26,
+ /** Binary property Soft_Dotted (new in Unicode 3.2).
+ Characters with a "soft dot", like i or j.
+ An accent placed on these characters causes
+ the dot to disappear. @stable ICU 2.1 */
+ UCHAR_SOFT_DOTTED=27,
+ /** Binary property Terminal_Punctuation.
+ Punctuation characters that generally mark
+ the end of textual units. @stable ICU 2.1 */
+ UCHAR_TERMINAL_PUNCTUATION=28,
+ /** Binary property Unified_Ideograph (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_UNIFIED_IDEOGRAPH=29,
+ /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+ Lu+Other_Uppercase @stable ICU 2.1 */
+ UCHAR_UPPERCASE=30,
+ /** Binary property White_Space.
+ Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+ Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+ UCHAR_WHITE_SPACE=31,
+ /** Binary property XID_Continue.
+ ID_Continue modified to allow closure under
+ normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_CONTINUE=32,
+ /** Binary property XID_Start. ID_Start modified to allow
+ closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_START=33,
+ /** Binary property Case_Sensitive. Either the source of a case
+ mapping or _in_ the target of a case mapping. Not the same as
+ the general category Cased_Letter. @stable ICU 2.6 */
+ UCHAR_CASE_SENSITIVE=34,
+ /** Binary property STerm (new in Unicode 4.0.1).
+ Sentence Terminal. Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ @stable ICU 3.0 */
+ UCHAR_S_TERM=35,
+ /** Binary property Variation_Selector (new in Unicode 4.0.1).
+ Indicates all those characters that qualify as Variation Selectors.
+ For details on the behavior of these characters,
+ see StandardizedVariants.html and 15.6 Variation Selectors.
+ @stable ICU 3.0 */
+ UCHAR_VARIATION_SELECTOR=36,
+ /** Binary property NFD_Inert.
+ ICU-specific property for characters that are inert under NFD,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+
+ There is one such property per normalization form.
+ These properties are computed as follows - an inert character is:
+ a) unassigned, or ALL of the following:
+ b) of combining class 0.
+ c) not decomposed by this normalization form.
+ AND if NFC or NFKC,
+ d) can never compose with a previous character.
+ e) can never compose with a following character.
+ f) can never change if another character is added.
+ Example: a-breve might satisfy all but f, but if you
+ add an ogonek it changes to a-ogonek + breve
+
+ See also com.ibm.text.UCD.NFSkippable in the ICU4J repository,
+ and icu/source/common/unormimp.h .
+ @stable ICU 3.0 */
+ UCHAR_NFD_INERT=37,
+ /** Binary property NFKD_Inert.
+ ICU-specific property for characters that are inert under NFKD,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+ @see UCHAR_NFD_INERT
+ @stable ICU 3.0 */
+ UCHAR_NFKD_INERT=38,
+ /** Binary property NFC_Inert.
+ ICU-specific property for characters that are inert under NFC,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+ @see UCHAR_NFD_INERT
+ @stable ICU 3.0 */
+ UCHAR_NFC_INERT=39,
+ /** Binary property NFKC_Inert.
+ ICU-specific property for characters that are inert under NFKC,
+ i.e., they do not interact with adjacent characters.
+ Used for example in normalizing transforms in incremental mode
+ to find the boundary of safely normalizable text despite possible
+ text additions.
+ @see UCHAR_NFD_INERT
+ @stable ICU 3.0 */
+ UCHAR_NFKC_INERT=40,
+ /** Binary Property Segment_Starter.
+ ICU-specific property for characters that are starters in terms of
+ Unicode normalization and combining character sequences.
+ They have ccc=0 and do not occur in non-initial position of the
+ canonical decomposition of any character
+ (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)).
+ ICU uses this property for segmenting a string for generating a set of
+ canonically equivalent strings, e.g. for canonical closure while
+ processing collation tailoring rules.
+ @stable ICU 3.0 */
+ UCHAR_SEGMENT_STARTER=41,
+ /** Binary property Pattern_Syntax (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_SYNTAX=42,
+ /** Binary property Pattern_White_Space (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_WHITE_SPACE=43,
+ /** Binary property alnum (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_ALNUM=44,
+ /** Binary property blank (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_BLANK=45,
+ /** Binary property graph (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_GRAPH=46,
+ /** Binary property print (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_PRINT=47,
+ /** Binary property xdigit (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_XDIGIT=48,
+ /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_LIMIT=49,
+
+ /** Enumerated property Bidi_Class.
+ Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+ UCHAR_BIDI_CLASS=0x1000,
+ /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_START=UCHAR_BIDI_CLASS,
+ /** Enumerated property Block.
+ Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+ UCHAR_BLOCK=0x1001,
+ /** Enumerated property Canonical_Combining_Class.
+ Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+ UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
+ /** Enumerated property Decomposition_Type.
+ Returns UDecompositionType values. @stable ICU 2.2 */
+ UCHAR_DECOMPOSITION_TYPE=0x1003,
+ /** Enumerated property East_Asian_Width.
+ See http://www.unicode.org/reports/tr11/
+ Returns UEastAsianWidth values. @stable ICU 2.2 */
+ UCHAR_EAST_ASIAN_WIDTH=0x1004,
+ /** Enumerated property General_Category.
+ Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+ UCHAR_GENERAL_CATEGORY=0x1005,
+ /** Enumerated property Joining_Group.
+ Returns UJoiningGroup values. @stable ICU 2.2 */
+ UCHAR_JOINING_GROUP=0x1006,
+ /** Enumerated property Joining_Type.
+ Returns UJoiningType values. @stable ICU 2.2 */
+ UCHAR_JOINING_TYPE=0x1007,
+ /** Enumerated property Line_Break.
+ Returns ULineBreak values. @stable ICU 2.2 */
+ UCHAR_LINE_BREAK=0x1008,
+ /** Enumerated property Numeric_Type.
+ Returns UNumericType values. @stable ICU 2.2 */
+ UCHAR_NUMERIC_TYPE=0x1009,
+ /** Enumerated property Script.
+ Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+ UCHAR_SCRIPT=0x100A,
+ /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+ Returns UHangulSyllableType values. @stable ICU 2.6 */
+ UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
+ /** Enumerated property NFD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFD_QUICK_CHECK=0x100C,
+ /** Enumerated property NFKD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKD_QUICK_CHECK=0x100D,
+ /** Enumerated property NFC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFC_QUICK_CHECK=0x100E,
+ /** Enumerated property NFKC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKC_QUICK_CHECK=0x100F,
+ /** Enumerated property Lead_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the first code point
+ of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
+ /** Enumerated property Trail_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the last code point
+ of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
+ /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+ UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
+ /** Enumerated property Sentence_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns USentenceBreak values. @stable ICU 3.4 */
+ UCHAR_SENTENCE_BREAK=0x1013,
+ /** Enumerated property Word_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UWordBreakValues values. @stable ICU 3.4 */
+ UCHAR_WORD_BREAK=0x1014,
+ /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_LIMIT=0x1015,
+
+ /** Bitmask property General_Category_Mask.
+ This is the General_Category property returned as a bit mask.
+ When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+ returns bit masks for UCharCategory values where exactly one bit is set.
+ When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+ a multi-bit mask is used for sets of categories like "Letters".
+ Mask values should be cast to uint32_t.
+ @stable ICU 2.4 */
+ UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+ /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+ /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_LIMIT=0x2001,
+
+ /** Double property Numeric_Value.
+ Corresponds to u_getNumericValue. @stable ICU 2.4 */
+ UCHAR_NUMERIC_VALUE=0x3000,
+ /** First constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+ /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_LIMIT=0x3001,
+
+ /** String property Age.
+ Corresponds to u_charAge. @stable ICU 2.4 */
+ UCHAR_AGE=0x4000,
+ /** First constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_START=UCHAR_AGE,
+ /** String property Bidi_Mirroring_Glyph.
+ Corresponds to u_charMirror. @stable ICU 2.4 */
+ UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
+ /** String property Case_Folding.
+ Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+ UCHAR_CASE_FOLDING=0x4002,
+ /** String property ISO_Comment.
+ Corresponds to u_getISOComment. @stable ICU 2.4 */
+ UCHAR_ISO_COMMENT=0x4003,
+ /** String property Lowercase_Mapping.
+ Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+ UCHAR_LOWERCASE_MAPPING=0x4004,
+ /** String property Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_NAME=0x4005,
+ /** String property Simple_Case_Folding.
+ Corresponds to u_foldCase. @stable ICU 2.4 */
+ UCHAR_SIMPLE_CASE_FOLDING=0x4006,
+ /** String property Simple_Lowercase_Mapping.
+ Corresponds to u_tolower. @stable ICU 2.4 */
+ UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
+ /** String property Simple_Titlecase_Mapping.
+ Corresponds to u_totitle. @stable ICU 2.4 */
+ UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
+ /** String property Simple_Uppercase_Mapping.
+ Corresponds to u_toupper. @stable ICU 2.4 */
+ UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
+ /** String property Titlecase_Mapping.
+ Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+ UCHAR_TITLECASE_MAPPING=0x400A,
+ /** String property Unicode_1_Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_UNICODE_1_NAME=0x400B,
+ /** String property Uppercase_Mapping.
+ Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+ UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_LIMIT=0x400D,
+
+ /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+ UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+ /** See note !!. Comments of the form "Cn" are read by genpname. */
+
+ /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+ U_UNASSIGNED = 0,
+ /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+ U_GENERAL_OTHER_TYPES = 0,
+ /** Lu @stable ICU 2.0 */
+ U_UPPERCASE_LETTER = 1,
+ /** Ll @stable ICU 2.0 */
+ U_LOWERCASE_LETTER = 2,
+ /** Lt @stable ICU 2.0 */
+ U_TITLECASE_LETTER = 3,
+ /** Lm @stable ICU 2.0 */
+ U_MODIFIER_LETTER = 4,
+ /** Lo @stable ICU 2.0 */
+ U_OTHER_LETTER = 5,
+ /** Mn @stable ICU 2.0 */
+ U_NON_SPACING_MARK = 6,
+ /** Me @stable ICU 2.0 */
+ U_ENCLOSING_MARK = 7,
+ /** Mc @stable ICU 2.0 */
+ U_COMBINING_SPACING_MARK = 8,
+ /** Nd @stable ICU 2.0 */
+ U_DECIMAL_DIGIT_NUMBER = 9,
+ /** Nl @stable ICU 2.0 */
+ U_LETTER_NUMBER = 10,
+ /** No @stable ICU 2.0 */
+ U_OTHER_NUMBER = 11,
+ /** Zs @stable ICU 2.0 */
+ U_SPACE_SEPARATOR = 12,
+ /** Zl @stable ICU 2.0 */
+ U_LINE_SEPARATOR = 13,
+ /** Zp @stable ICU 2.0 */
+ U_PARAGRAPH_SEPARATOR = 14,
+ /** Cc @stable ICU 2.0 */
+ U_CONTROL_CHAR = 15,
+ /** Cf @stable ICU 2.0 */
+ U_FORMAT_CHAR = 16,
+ /** Co @stable ICU 2.0 */
+ U_PRIVATE_USE_CHAR = 17,
+ /** Cs @stable ICU 2.0 */
+ U_SURROGATE = 18,
+ /** Pd @stable ICU 2.0 */
+ U_DASH_PUNCTUATION = 19,
+ /** Ps @stable ICU 2.0 */
+ U_START_PUNCTUATION = 20,
+ /** Pe @stable ICU 2.0 */
+ U_END_PUNCTUATION = 21,
+ /** Pc @stable ICU 2.0 */
+ U_CONNECTOR_PUNCTUATION = 22,
+ /** Po @stable ICU 2.0 */
+ U_OTHER_PUNCTUATION = 23,
+ /** Sm @stable ICU 2.0 */
+ U_MATH_SYMBOL = 24,
+ /** Sc @stable ICU 2.0 */
+ U_CURRENCY_SYMBOL = 25,
+ /** Sk @stable ICU 2.0 */
+ U_MODIFIER_SYMBOL = 26,
+ /** So @stable ICU 2.0 */
+ U_OTHER_SYMBOL = 27,
+ /** Pi @stable ICU 2.0 */
+ U_INITIAL_PUNCTUATION = 28,
+ /** Pf @stable ICU 2.0 */
+ U_FINAL_PUNCTUATION = 29,
+ /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
+ U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+ (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+ (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+ U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+ /** See note !!. Comments of the form "EN" are read by genpname. */
+
+ /** L @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT = 0,
+ /** R @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT = 1,
+ /** EN @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER = 2,
+ /** ES @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_SEPARATOR = 3,
+ /** ET @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_TERMINATOR = 4,
+ /** AN @stable ICU 2.0 */
+ U_ARABIC_NUMBER = 5,
+ /** CS @stable ICU 2.0 */
+ U_COMMON_NUMBER_SEPARATOR = 6,
+ /** B @stable ICU 2.0 */
+ U_BLOCK_SEPARATOR = 7,
+ /** S @stable ICU 2.0 */
+ U_SEGMENT_SEPARATOR = 8,
+ /** WS @stable ICU 2.0 */
+ U_WHITE_SPACE_NEUTRAL = 9,
+ /** ON @stable ICU 2.0 */
+ U_OTHER_NEUTRAL = 10,
+ /** LRE @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_EMBEDDING = 11,
+ /** LRO @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_OVERRIDE = 12,
+ /** AL @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_ARABIC = 13,
+ /** RLE @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_EMBEDDING = 14,
+ /** RLO @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_OVERRIDE = 15,
+ /** PDF @stable ICU 2.0 */
+ U_POP_DIRECTIONAL_FORMAT = 16,
+ /** NSM @stable ICU 2.0 */
+ U_DIR_NON_SPACING_MARK = 17,
+ /** BN @stable ICU 2.0 */
+ U_BOUNDARY_NEUTRAL = 18,
+ /** @stable ICU 2.0 */
+ U_CHAR_DIRECTION_COUNT
+} UCharDirection;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+
+ /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+ UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Greek and Coptic".
+ * @stable ICU 2.0
+ */
+ UBLOCK_GREEK =8, /*[0370]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CYRILLIC =9, /*[0400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARMENIAN =10, /*[0530]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HEBREW =11, /*[0590]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC =12, /*[0600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SYRIAC =13, /*[0700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAANA =14, /*[0780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BENGALI =16, /*[0980]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GUJARATI =18, /*[0A80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ORIYA =19, /*[0B00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TAMIL =20, /*[0B80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TELUGU =21, /*[0C00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANNADA =22, /*[0C80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SINHALA =24, /*[0D80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAI =25, /*[0E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LAO =26, /*[0E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TIBETAN =27, /*[0F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MYANMAR =28, /*[1000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OGHAM =34, /*[1680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_RUNIC =35, /*[16A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KHMER =36, /*[1780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+ * @stable ICU 2.0
+ */
+ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARROWS =46, /*[2190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DINGBATS =56, /*[2700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIRAGANA =62, /*[3040]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KATAKANA =63, /*[30A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANBUN =66, /*[3190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+ /**
+ * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE = 78,
+ /**
+ * Same as UBLOCK_PRIVATE_USE.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+ /* New blocks in Unicode 3.1 */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OLD_ITALIC = 88 , /*[10300]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_GOTHIC = 89 , /*[10330]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_DESERET = 90 , /*[10400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_TAGS = 96, /*[E0000]*/
+
+ /* New blocks in Unicode 3.2 */
+
+ /**
+ * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+ * @stable ICU 2.2
+ */
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = 97,
+ /** @stable ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGALOG = 98, /*[1700]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_HANUNOO = 99, /*[1720]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_BUHID = 100, /*[1740]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGBANWA = 101, /*[1760]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+ /* New blocks in Unicode 4 */
+
+ /** @stable ICU 2.6 */
+ UBLOCK_LIMBU = 111, /*[1900]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_LE = 112, /*[1950]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_UGARITIC = 120, /*[10380]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_SHAVIAN = 121, /*[10450]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_OSMANYA = 122, /*[10480]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+ /* New blocks in Unicode 4.1 */
+
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_BUGINESE = 129, /*[1A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_CJK_STROKES = 130, /*[31C0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COPTIC = 132, /*[2C80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_TIFINAGH = 144, /*[2D30]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
+
+ /* New blocks in Unicode 5.0 */
+
+ /** @stable ICU 3.6 */
+ UBLOCK_NKO = 146, /*[07C0]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_BALINESE = 147, /*[1B00]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHAGS_PA = 150, /*[A840]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHOENICIAN = 151, /*[10900]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM = 152, /*[12000]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COUNT = 155,
+
+ /** @stable ICU 2.0 */
+ UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+ U_EA_NEUTRAL, /*[N]*/ /*See note !!*/
+ U_EA_AMBIGUOUS, /*[A]*/
+ U_EA_HALFWIDTH, /*[H]*/
+ U_EA_FULLWIDTH, /*[F]*/
+ U_EA_NARROW, /*[Na]*/
+ U_EA_WIDE, /*[W]*/
+ U_EA_COUNT
+} UEastAsianWidth;
+/*
+ * Implementation note:
+ * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
+ */
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+ U_UNICODE_CHAR_NAME,
+ U_UNICODE_10_CHAR_NAME,
+ U_EXTENDED_CHAR_NAME,
+ U_CHAR_NAME_CHOICE_COUNT
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName(). These selectors are used to choose which
+ * name is returned for a given property or value. All properties and
+ * values have a long name. Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+ U_SHORT_PROPERTY_NAME,
+ U_LONG_PROPERTY_NAME,
+ U_PROPERTY_NAME_CHOICE_COUNT
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+ U_DT_NONE, /*[none]*/ /*See note !!*/
+ U_DT_CANONICAL, /*[can]*/
+ U_DT_COMPAT, /*[com]*/
+ U_DT_CIRCLE, /*[enc]*/
+ U_DT_FINAL, /*[fin]*/
+ U_DT_FONT, /*[font]*/
+ U_DT_FRACTION, /*[fra]*/
+ U_DT_INITIAL, /*[init]*/
+ U_DT_ISOLATED, /*[iso]*/
+ U_DT_MEDIAL, /*[med]*/
+ U_DT_NARROW, /*[nar]*/
+ U_DT_NOBREAK, /*[nb]*/
+ U_DT_SMALL, /*[sml]*/
+ U_DT_SQUARE, /*[sqr]*/
+ U_DT_SUB, /*[sub]*/
+ U_DT_SUPER, /*[sup]*/
+ U_DT_VERTICAL, /*[vert]*/
+ U_DT_WIDE, /*[wide]*/
+ U_DT_COUNT /* 18 */
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+ U_JT_NON_JOINING, /*[U]*/ /*See note !!*/
+ U_JT_JOIN_CAUSING, /*[C]*/
+ U_JT_DUAL_JOINING, /*[D]*/
+ U_JT_LEFT_JOINING, /*[L]*/
+ U_JT_RIGHT_JOINING, /*[R]*/
+ U_JT_TRANSPARENT, /*[T]*/
+ U_JT_COUNT /* 6 */
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+ U_JG_NO_JOINING_GROUP,
+ U_JG_AIN,
+ U_JG_ALAPH,
+ U_JG_ALEF,
+ U_JG_BEH,
+ U_JG_BETH,
+ U_JG_DAL,
+ U_JG_DALATH_RISH,
+ U_JG_E,
+ U_JG_FEH,
+ U_JG_FINAL_SEMKATH,
+ U_JG_GAF,
+ U_JG_GAMAL,
+ U_JG_HAH,
+ U_JG_HAMZA_ON_HEH_GOAL,
+ U_JG_HE,
+ U_JG_HEH,
+ U_JG_HEH_GOAL,
+ U_JG_HETH,
+ U_JG_KAF,
+ U_JG_KAPH,
+ U_JG_KNOTTED_HEH,
+ U_JG_LAM,
+ U_JG_LAMADH,
+ U_JG_MEEM,
+ U_JG_MIM,
+ U_JG_NOON,
+ U_JG_NUN,
+ U_JG_PE,
+ U_JG_QAF,
+ U_JG_QAPH,
+ U_JG_REH,
+ U_JG_REVERSED_PE,
+ U_JG_SAD,
+ U_JG_SADHE,
+ U_JG_SEEN,
+ U_JG_SEMKATH,
+ U_JG_SHIN,
+ U_JG_SWASH_KAF,
+ U_JG_SYRIAC_WAW,
+ U_JG_TAH,
+ U_JG_TAW,
+ U_JG_TEH_MARBUTA,
+ U_JG_TETH,
+ U_JG_WAW,
+ U_JG_YEH,
+ U_JG_YEH_BARREE,
+ U_JG_YEH_WITH_TAIL,
+ U_JG_YUDH,
+ U_JG_YUDH_HE,
+ U_JG_ZAIN,
+ U_JG_FE, /**< @stable ICU 2.6 */
+ U_JG_KHAPH, /**< @stable ICU 2.6 */
+ U_JG_ZHAIN, /**< @stable ICU 2.6 */
+ U_JG_COUNT
+} UJoiningGroup;
+
+/**
+ * Grapheme Cluster Break constants.
+ *
+ * @see UCHAR_GRAPHEME_CLUSTER_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UGraphemeClusterBreak {
+ U_GCB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ U_GCB_CONTROL = 1, /*[CN]*/
+ U_GCB_CR = 2, /*[CR]*/
+ U_GCB_EXTEND = 3, /*[EX]*/
+ U_GCB_L = 4, /*[L]*/
+ U_GCB_LF = 5, /*[LF]*/
+ U_GCB_LV = 6, /*[LV]*/
+ U_GCB_LVT = 7, /*[LVT]*/
+ U_GCB_T = 8, /*[T]*/
+ U_GCB_V = 9, /*[V]*/
+ U_GCB_COUNT = 10
+} UGraphemeClusterBreak;
+
+/**
+ * Word Break constants.
+ * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
+ *
+ * @see UCHAR_WORD_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UWordBreakValues {
+ U_WB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ U_WB_ALETTER = 1, /*[LE]*/
+ U_WB_FORMAT = 2, /*[FO]*/
+ U_WB_KATAKANA = 3, /*[KA]*/
+ U_WB_MIDLETTER = 4, /*[ML]*/
+ U_WB_MIDNUM = 5, /*[MN]*/
+ U_WB_NUMERIC = 6, /*[NU]*/
+ U_WB_EXTENDNUMLET = 7, /*[EX]*/
+ U_WB_COUNT = 8
+} UWordBreakValues;
+
+/**
+ * Sentence Break constants.
+ *
+ * @see UCHAR_SENTENCE_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum USentenceBreak {
+ U_SB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ U_SB_ATERM = 1, /*[AT]*/
+ U_SB_CLOSE = 2, /*[CL]*/
+ U_SB_FORMAT = 3, /*[FO]*/
+ U_SB_LOWER = 4, /*[LO]*/
+ U_SB_NUMERIC = 5, /*[NU]*/
+ U_SB_OLETTER = 6, /*[LE]*/
+ U_SB_SEP = 7, /*[SE]*/
+ U_SB_SP = 8, /*[SP]*/
+ U_SB_STERM = 9, /*[ST]*/
+ U_SB_UPPER = 10, /*[UP]*/
+ U_SB_COUNT = 11
+} USentenceBreak;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+ U_LB_UNKNOWN = 0, /*[XX]*/ /*See note !!*/
+ U_LB_AMBIGUOUS = 1, /*[AI]*/
+ U_LB_ALPHABETIC = 2, /*[AL]*/
+ U_LB_BREAK_BOTH = 3, /*[B2]*/
+ U_LB_BREAK_AFTER = 4, /*[BA]*/
+ U_LB_BREAK_BEFORE = 5, /*[BB]*/
+ U_LB_MANDATORY_BREAK = 6, /*[BK]*/
+ U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
+ U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
+ U_LB_COMBINING_MARK = 9, /*[CM]*/
+ U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
+ U_LB_EXCLAMATION = 11, /*[EX]*/
+ U_LB_GLUE = 12, /*[GL]*/
+ U_LB_HYPHEN = 13, /*[HY]*/
+ U_LB_IDEOGRAPHIC = 14, /*[ID]*/
+ U_LB_INSEPERABLE = 15,
+ /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
+ U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
+ U_LB_INFIX_NUMERIC = 16, /*[IS]*/
+ U_LB_LINE_FEED = 17, /*[LF]*/
+ U_LB_NONSTARTER = 18, /*[NS]*/
+ U_LB_NUMERIC = 19, /*[NU]*/
+ U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
+ U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
+ U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
+ U_LB_QUOTATION = 23, /*[QU]*/
+ U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
+ U_LB_SURROGATE = 25, /*[SG]*/
+ U_LB_SPACE = 26, /*[SP]*/
+ U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
+ U_LB_ZWSPACE = 28, /*[ZW]*/
+ U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+ U_LB_WORD_JOINER = 30, /*[WJ]*/
+ U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
+ U_LB_H3 = 32, /*[H3]*/
+ U_LB_JL = 33, /*[JL]*/
+ U_LB_JT = 34, /*[JT]*/
+ U_LB_JV = 35, /*[JV]*/
+ U_LB_COUNT = 36
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+ U_NT_NONE, /*[None]*/ /*See note !!*/
+ U_NT_DECIMAL, /*[de]*/
+ U_NT_DIGIT, /*[di]*/
+ U_NT_NUMERIC, /*[nu]*/
+ U_NT_COUNT
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+ U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/
+ U_HST_LEADING_JAMO, /*[L]*/
+ U_HST_VOWEL_JAMO, /*[V]*/
+ U_HST_TRAILING_JAMO, /*[T]*/
+ U_HST_LV_SYLLABLE, /*[LV]*/
+ U_HST_LVT_SYLLABLE, /*[LVT]*/
+ U_HST_COUNT
+} UHangulSyllableType;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which
+ * The
+ * If the digit is less than 10, then
+ *
+ * For ensuring the availability of necessary data, an application should
+ * open the service objects (converters, collators, etc.) that it will use
+ * and check for error codes there.
+ *
+ * Documentation for ICU 2.6 to ICU 3.4:
+ *
+ * This function loads and initializes data items
+ * that are required internally by various ICU functions. Use of this explicit
+ * initialization is required in multi-threaded applications; in
+ * single threaded apps, use is optional, but incurs little additional
+ * cost, and is thus recommended.
+ *
+ * In multi-threaded applications, u_init() should be called in the
+ * main thread before starting additional threads, or, alternatively
+ * it can be called in each individual thread once, before other ICU
+ * functions are called in that thread. In this second scenario, the
+ * application must guarantee that the first call to u_init() happen
+ * without contention, in a single thread only.
+ *
+ * If
+ * Extra, repeated, or otherwise unneeded calls to u_init() do no harm,
+ * other than taking a small amount of time.
+ *
+ * @param status An ICU UErrorCode parameter. It must not be
+ * u_cleanup() also clears any ICU heap functions, mutex functions or
+ * trace functions that may have been set for the process.
+ * This has the effect of restoring ICU to its initial condition, before
+ * any of these override functions were installed. Refer to
+ * u_setMemoryFunctions(), u_setMutexFunctions and
+ * utrace_setFunctions(). If ICU is to be reinitialized after after
+ * calling u_cleanup(), these runtime override functions will need to
+ * be set up again if they are still required.
+ *
+ * u_cleanup() is not thread safe. All other threads should stop using ICU
+ * before calling this function.
+ *
+ * Any open ICU items will be left in an undefined state by u_cleanup(),
+ * and any subsequent attempt to use such an item will give unpredictable
+ * results.
+ *
+ * After calling u_cleanup(), an application may continue to use ICU by
+ * calling u_init(). An application must invoke u_init() first from one single
+ * thread before allowing other threads call u_init(). All threads existing
+ * at the time of the first thread's call to u_init() must also call
+ * u_init() themselves before continuing with other ICU operations.
+ *
+ * The use of u_cleanup() just before an application terminates is optional,
+ * but it should be called only once for performance reasons. The primary
+ * benefit is to eliminate reports of memory or resource leaks originating
+ * in ICU code from the results generated by heap analysis tools.
+ *
+ * Use this function with great care!
+ * This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter. Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information. When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
+ * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution. More information about this API can be found in our
+ * User's
+ * Guide. A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored. Options are appended to the converter name string, with a
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the User's
+ * Guide. Aliases starting with a "cp" prefix have no specific meaning
+ * other than its an alias starting with the letters "cp". Please do not
+ * associate any meaning to these aliases. See ucnv_open for the complete details Creates a UConverter object specified from a packageName and a converterName. The packageName and converterName must point to an ICU udata object, as defined by
+ * The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_safeClone() function to clone a
+ * 'master' converter. A future version of ICU may add alias table lookups and/or caching
+ * to this function. Example Use:
+ * Handling of surrogate pairs and supplementary-plane code points:
+ * Example alias table:
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:
+ * Example alias table:
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:
+ * Usage:
+ * @code
+ * UErrorCode err = U_ZERO_ERROR;
+ * char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
+ * int32_t signatureLength = 0;
+ * char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
+ * UConverter *conv = NULL;
+ * UChar output[100];
+ * UChar *target = output, *out;
+ * char *source = input;
+ * if(encoding!=NULL && U_SUCCESS(err)){
+ * // should signature be discarded ?
+ * conv = ucnv_open(encoding, &err);
+ * // do the conversion
+ * ucnv_toUnicode(conv,
+ * target, output + sizeof(output)/U_SIZEOF_UCHAR,
+ * source, input + sizeof(input),
+ * NULL, TRUE, &err);
+ * out = output;
+ * if (discardSignature){
+ * ++out; // ignore initial U+FEFF
+ * }
+ * while(out != target) {
+ * printf("%04x ", *out++);
+ * }
+ * puts("");
+ * }
+ *
+ * @endcode
+ *
+ * @param source The source string in which the signature should be detected.
+ * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature
+ * of the detected UTF. 0 if not detected.
+ * Can be a NULL pointer.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected.
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+ int32_t sourceLength,
+ int32_t *signatureLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the number of UChars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of UChars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held as internal state
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+
+#endif
+
+#endif
+/*_UCNV*/
diff --git a/jni/EastAsianWidth/unicode/ucnv_cb.h b/jni/EastAsianWidth/unicode/ucnv_cb.h
new file mode 100644
index 0000000..f0e67ba
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ucnv_cb.h
@@ -0,0 +1,162 @@
+/*
+**********************************************************************
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv_cb.h:
+ * External APIs for the ICU's codeset conversion library
+ * Helena Shih
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ */
+
+/**
+ * \file
+ * \brief C UConverter functions to aid the writers of callbacks
+ *
+ * This structure may grow in the future, indicated by the
+ * The platform data property fields help determine if a data
+ * file can be efficiently used on a given machine.
+ * The particular fields are of importance only if the data
+ * is affected by the properties - if there is integer data
+ * with word sizes > 1 byte, char* text, or UChar* text. The implementation for the The The The The implementation depends on platform properties and user preferences
+ * and may involve loading shared libraries (DLLs), mapping
+ * files into memory, or fopen()/fread() files.
+ * It may also involve using static memory or database queries etc.
+ * Several or all data items may be combined into one entity
+ * (DLL, memory-mappable file). The data is always preceded by a header that includes
+ * a If For details about ICU data loading see the User Guide
+ * Data Management chapter. (http://icu-project.org/userguide/icudata.html)
+ * You create a
+ * The second option includes an additonal ISO Country
+ * Code. These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ *
+ * The third option requires another additonal information--the
+ * Variant.
+ * The Variant codes are vendor and browser-specific.
+ * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_WIN".
+ *
+ *
+ * Because a
+ * The
+ * Once you've specified a locale you can query it for information about
+ * itself. Use
+ * The ICU provides a number of services that perform locale-sensitive
+ * operations. For example, the
+ * Each international serivce that performs locale-sensitive operations
+ * allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly
+ * Concerning POSIX/RFC1766 Locale IDs,
+ * the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the \@at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions. For example,
+ * don't use de-de\@EURO as an argument to resourcebundle.
+ *
+ * @see UResourceBundle
+ */
+
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_CHINESE "zh"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ENGLISH "en"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_FRENCH "fr"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_GERMAN "de"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ITALIAN "it"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_JAPANESE "ja"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_KOREAN "ko"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_TRADITIONAL_CHINESE "zh_TW"
+
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA "en_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA_FRENCH "fr_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CHINA "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_PRC "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_FRANCE "fr_FR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_GERMANY "de_DE"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_ITALY "it_IT"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_JAPAN "ja_JP"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_KOREA "ko_KR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_TAIWAN "zh_TW"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_UK "en_GB"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_US "en_US"
+
+/**
+ * Useful constant for the maximum size of the language part of a locale ID.
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_LANG_CAPACITY 12
+
+/**
+ * Useful constant for the maximum size of the country part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_COUNTRY_CAPACITY 4
+/**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_FULLNAME_CAPACITY 56
+
+/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 50
+
+/**
+ * Useful constant for the maximum SIZE of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
+
+/**
+ * Character separating keywords from the locale string
+ * different for EBCDIC - TODO
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+/**
+ * Character for assigning value to a keyword
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+/**
+ * Character separating keywords
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Constants for *_getLocale()
+ * Allow user to select whether she wants information on
+ * requested, valid or actual locale.
+ * For example, a collator for "en_US_CALIFORNIA" was
+ * requested. In the current state of ICU (2.0),
+ * the requested locale is "en_US_CALIFORNIA",
+ * the valid locale is "en_US" (most specific locale supported by ICU)
+ * and the actual locale is "root" (the collation data comes unmodified
+ * from the UCA)
+ * The locale is considered supported by ICU if there is a core ICU bundle
+ * for that locale (although it may be empty).
+ * @stable ICU 2.1
+ */
+typedef enum {
+ /** This is locale the data actually comes from
+ * @stable ICU 2.1
+ */
+ ULOC_ACTUAL_LOCALE = 0,
+ /** This is the most specific locale supported by ICU
+ * @stable ICU 2.1
+ */
+ ULOC_VALID_LOCALE = 1,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /** This is the requested locale
+ * @deprecated ICU 2.8
+ */
+ ULOC_REQUESTED_LOCALE = 2,
+#endif /* U_HIDE_DEPRECATED_API */
+
+ ULOC_DATA_LOCALE_TYPE_LIMIT = 3
+} ULocDataLocaleType ;
+
+
+/**
+ * Gets ICU's default locale.
+ * The returned string is a snapshot in time, and will remain valid
+ * and unchanged even when uloc_setDefault() is called.
+ * The returned storage is owned by ICU, and must not be altered or deleted
+ * by the caller.
+ *
+ * @return the ICU default locale
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getDefault(void);
+
+/**
+ * Sets ICU's default locale.
+ * By default (without calling this function), ICU's default locale will be based
+ * on information obtained from the underlying system environment.
+ *
+ * Changes to ICU's default locale do not propagate back to the
+ * system environment.
+ *
+ * Changes to ICU's default locale to not affect any ICU services that
+ * may already be open based on the previous default locale value.
+ *
+ * @param localeID the new ICU default locale. A value of NULL will try to get
+ * the system's default locale.
+ * @param status the error information if the setting of default locale fails
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+uloc_setDefault(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Gets the language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getLanguage(const char* localeID,
+ char* language,
+ int32_t languageCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than scriptCapacity, the returned language code will be truncated.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the country code for the specified locale.
+ *
+ * @param localeID the locale to get the country code with
+ * @param country the country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * country code with
+ * @param err error information if retrieving the country code failed
+ * @return the actual buffer size needed for the country code. If it's greater
+ * than countryCapacity, the returned country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getCountry(const char* localeID,
+ char* country,
+ int32_t countryCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the variant code for the specified locale.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param variant the variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * variant code with
+ * @param err error information if retrieving the variant code failed
+ * @return the actual buffer size needed for the variant code. If it's greater
+ * than variantCapacity, the returned variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getVariant(const char* localeID,
+ char* variant,
+ int32_t variantCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_canonicalize(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the ISO language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID);
+
+
+/**
+ * Gets the ISO country code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO country code with
+ * @return country the ISO country code for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID);
+
+/**
+ * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
+ *
+ * @param localeID the locale to get the Win32 LCID value with
+ * @return country the Win32 LCID for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID);
+
+/**
+ * Gets the language name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch".
+ * @param language the displayable language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * displayable language code with
+ * @param status error information if retrieving the displayable language code failed
+ * @return the actual buffer size needed for the displayable language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char* locale,
+ const char* displayLocale,
+ UChar* language,
+ int32_t languageCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "", while passing Locale::getGerman()
+ * for inLocale would result in "". NULL may be used to specify the default.
+ * @param script the displayable country code for localeID
+ * @param scriptCapacity the size of the script buffer to store the
+ * displayable script code with
+ * @param status error information if retrieving the displayable script code failed
+ * @return the actual buffer size needed for the displayable script code. If it's greater
+ * than scriptCapacity, the returned displayable script code will be truncated.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar* script,
+ int32_t scriptCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the country name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param country the displayable country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * displayable country code with
+ * @param status error information if retrieving the displayable country code failed
+ * @return the actual buffer size needed for the displayable country code. If it's greater
+ * than countryCapacity, the returned displayable country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayCountry(const char* locale,
+ const char* displayLocale,
+ UChar* country,
+ int32_t countryCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Gets the variant name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param variant the displayable variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * displayable variant code with
+ * @param status error information if retrieving the displayable variant code failed
+ * @return the actual buffer size needed for the displayable variant code. If it's greater
+ * than variantCapacity, the returned displayable variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayVariant(const char* locale,
+ const char* displayLocale,
+ UChar* variant,
+ int32_t variantCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the keyword name suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for the keyword collation.
+ * Usage:
+ * Note: UnicodeFilter currently stubs out two pure virtual methods
+ * of its base class, UnicodeMatcher. These methods are toPattern()
+ * and matchesIndexValue(). This is done so that filter classes that
+ * are not actually used as matchers -- specifically, those in the
+ * UnicodeFilterLogic component, and those in tests -- can continue to
+ * work without defining these methods. As long as a filter is not
+ * used in an RBT during real transliteration, these methods will not
+ * be called. However, this breaks the UnicodeMatcher base class
+ * protocol, and it is not a correct solution.
+ *
+ * In the future we may revisit the UnicodeMatcher / UnicodeFilter
+ * hierarchy and either redesign it, or simply remove the stubs in
+ * UnicodeFilter and force subclasses to implement the full
+ * UnicodeMatcher protocol.
+ *
+ * @see UnicodeFilterLogic
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
+
+public:
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeFilter();
+
+ /**
+ * Returns true for characters that are in the selected
+ * subset. In other words, if a character is to be
+ * filtered, then contains() returns
+ * false.
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 c) const = 0;
+
+ /**
+ * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
+ * and return the pointer.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeMatcher* toMatcher() const;
+
+ /**
+ * Implement UnicodeMatcher API.
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental);
+
+ /**
+ * UnicodeFunctor API. Nothing to do.
+ * @stable ICU 2.4
+ */
+ virtual void setData(const TransliterationRuleData*);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const = 0;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+ /*
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
+ * @stable ICU 2.0
+ */
+/* UnicodeFilter();*/
+};
+
+/*inline UnicodeFilter::UnicodeFilter() {}*/
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/unifunct.h b/jni/EastAsianWidth/unicode/unifunct.h
new file mode 100644
index 0000000..3aa7b03
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/unifunct.h
@@ -0,0 +1,125 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2005, International Business Machines Corporation
+* and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 01/14/2002 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIFUNCT_H
+#define UNIFUNCT_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Functor
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeMatcher;
+class UnicodeReplacer;
+class TransliterationRuleData;
+
+/**
+ * Concrete subclasses of UnicodeFunctor should use the macro
+ * UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to
+ * provide definitios getStaticClassID and getDynamicClassID.
+ *
+ * @return The class ID for this object. All objects of a given
+ * class have the same class ID. Objects of other classes have
+ * different class IDs.
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Set the data object associated with this functor. The data
+ * object provides context for functor-to-standin mapping. This
+ * method is required when assigning a functor to a different data
+ * object. This function MAY GO AWAY later if the architecture is
+ * changed to pass data object pointers through the API.
+ * @internal ICU 2.1
+ */
+ virtual void setData(const TransliterationRuleData*) = 0;
+
+protected:
+
+ /**
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
+ * @stable ICU 2.0
+ */
+ /*UnicodeFunctor();*/
+
+};
+
+/*inline UnicodeFunctor::UnicodeFunctor() {}*/
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/unimatch.h b/jni/EastAsianWidth/unicode/unimatch.h
new file mode 100644
index 0000000..0dbb14e
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/unimatch.h
@@ -0,0 +1,163 @@
+/*
+* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 07/18/01 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIMATCH_H
+#define UNIMATCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Matcher
+ */
+
+
+U_NAMESPACE_BEGIN
+
+class Replaceable;
+class UnicodeString;
+class UnicodeSet;
+
+/**
+ * Constants returned by Note: This method is not const because some classes may
+ * modify their state as the result of a match.
+ *
+ * @param text the text to be matched
+ * @param offset on input, the index into text at which to begin
+ * matching. On output, the limit of the matched text. The
+ * number of matched characters is the output value of offset
+ * minus the input value. Offset should always point to the
+ * HIGH SURROGATE (leading code unit) of a pair of surrogates,
+ * both on entry and upon return.
+ * @param limit the limit index of text to be matched. Greater
+ * than offset for a forward direction match, less than offset for
+ * a backward direction match. The last character to be
+ * considered for matching will be text.charAt(limit-1) in the
+ * forward direction or text.charAt(limit+1) in the backward
+ * direction.
+ * @param incremental if TRUE, then assume further characters may
+ * be inserted at limit and check for partial matching. Otherwise
+ * assume the text as given is complete.
+ * @return a match degree value indicating a full match, a partial
+ * match, or a mismatch. If incremental is FALSE then
+ * U_PARTIAL_MATCH should never be returned.
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) = 0;
+
+ /**
+ * Returns a string representation of this matcher. If the result of
+ * calling this function is passed to the appropriate parser, it
+ * will produce another matcher that is equal to this one.
+ * @param result the string to receive the pattern. Previous
+ * contents will be deleted.
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeString& toPattern(UnicodeString& result,
+ UBool escapeUnprintable = FALSE) const = 0;
+
+ /**
+ * Returns TRUE if this matcher will match a character c, where c
+ * & 0xFF == v, at offset, in the forward direction (with limit >
+ * offset). This is used by RuleBasedTransliterator for
+ * indexing.
+ * @stable ICU 2.4
+ */
+ virtual UBool matchesIndexValue(uint8_t v) const = 0;
+
+ /**
+ * Union the set of all characters that may be matched by this object
+ * into the given set.
+ * @param toUnionTo the set into which to union the source characters
+ * @stable ICU 2.4
+ */
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
+};
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/uniset.h b/jni/EastAsianWidth/unicode/uniset.h
new file mode 100644
index 0000000..5ee2811
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uniset.h
@@ -0,0 +1,1516 @@
+/*
+***************************************************************************
+* Copyright (C) 1999-2007, International Business Machines Corporation
+* and others. All Rights Reserved.
+***************************************************************************
+* Date Name Description
+* 10/20/99 alan Creation.
+***************************************************************************
+*/
+
+#ifndef UNICODESET_H
+#define UNICODESET_H
+
+#include "unicode/unifilt.h"
+#include "unicode/unistr.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Set
+ */
+
+U_NAMESPACE_BEGIN
+
+class BMPSet;
+class ParsePosition;
+class SymbolTable;
+class UnicodeSetStringSpan;
+class UVector;
+class RuleCharacterIterator;
+
+/**
+ * A mutable set of Unicode characters and multicharacter strings. Objects of this class
+ * represent character classes used in regular expressions.
+ * A character specifies a subset of Unicode code points. Legal
+ * code points are U+0000 to U+10FFFF, inclusive.
+ *
+ * The UnicodeSet class is not designed to be subclassed.
+ *
+ * The second API is the
+ * Pattern syntax Property patterns specify a set of characters having a certain
+ * property as defined by the Unicode standard. Both the POSIX-like
+ * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a
+ * complete list of supported property patterns, see the User's Guide
+ * for UnicodeSet at
+ *
+ * http://icu-project.org/userguide/unicodeSet.html.
+ * Actual determination of property data is defined by the underlying
+ * Unicode database as implemented by UCharacter.
+ *
+ * Patterns specify individual characters, ranges of characters, and
+ * Unicode property sets. When elements are concatenated, they
+ * specify their union. To complement a set, place a '^' immediately
+ * after the opening '['. Property patterns are inverted by modifying
+ * their delimiters; "[:^foo]" and "\\P{foo}". In any other location,
+ * '^' has no special meaning.
+ *
+ * Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z". This specifies the range of all
+ * characters from the left to the right, in Unicode order. If the
+ * left character is greater than or equal to the
+ * right character it is a syntax error. If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096. Operators ('&' and '|') have equal
+ * precedence and bind left-to-right. Thus
+ * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for
+ * difference; intersection is commutative.
+ *
+ * Warning: you cannot add an empty string ("") to a UnicodeSet. Formal syntax For an overview of Unicode strings in C and C++ see the
+ * User Guide Strings chapter. In ICU, a Unicode string consists of 16-bit Unicode code units.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar.
+ * For single-character handling, a Unicode character code point is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points. Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h). UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * User Guide Strings chapter.
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * BNF definition.
+ * More on resource bundle concepts and syntax can be found in the
+ * Users Guide.
+ *
+ */
+
+/**
+ * UResourceBundle is an opaque type for handles for resource bundles in C APIs.
+ * @stable ICU 2.0
+ */
+struct UResourceBundle;
+
+/**
+ * @stable ICU 2.0
+ */
+typedef struct UResourceBundle UResourceBundle;
+
+/**
+ * Numeric constants for types of resource items.
+ * @see ures_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** Resource type constant for "no resource". @stable ICU 2.6 */
+ URES_NONE=-1,
+
+ /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
+ URES_STRING=0,
+
+ /** Resource type constant for binary data. @stable ICU 2.6 */
+ URES_BINARY=1,
+
+ /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
+ URES_TABLE=2,
+
+ /**
+ * Resource type constant for aliases;
+ * internally stores a string which identifies the actual resource
+ * storing the data (can be in a different resource bundle).
+ * Resolved internally before delivering the actual resource through the API.
+ * @stable ICU 2.6
+ */
+ URES_ALIAS=3,
+
+#ifndef U_HIDE_INTERNAL_API
+
+ /**
+ * Internal use only.
+ * Alternative resource type constant for tables of key-value pairs.
+ * Never returned by ures_getType().
+ * @internal
+ */
+ URES_TABLE32=4,
+
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Resource type constant for a single 28-bit integer, interpreted as
+ * signed or unsigned by the ures_getInt() or ures_getUInt() function.
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.6
+ */
+ URES_INT=7,
+
+ /** Resource type constant for arrays of resources. @stable ICU 2.6 */
+ URES_ARRAY=8,
+
+ /**
+ * Resource type constant for vectors of 32-bit integers.
+ * @see ures_getIntVector
+ * @stable ICU 2.6
+ */
+ URES_INT_VECTOR = 14,
+#ifndef U_HIDE_DEPRECATED_API
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_NONE=URES_NONE,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_STRING=URES_STRING,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_BINARY=URES_BINARY,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_TABLE=URES_TABLE,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_ALIAS=URES_ALIAS,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_INT=URES_INT,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_ARRAY=URES_ARRAY,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_INT_VECTOR=URES_INT_VECTOR,
+ /** @deprecated ICU 2.6 Not used. */
+ RES_RESERVED=15,
+#endif /* U_HIDE_DEPRECATED_API */
+
+ URES_LIMIT = 16
+} UResType;
+
+/*
+ * Functions to create and destroy resource bundles.
+ */
+
+/**
+ * Opens a UResourceBundle, from which users can extract strings by using
+ * their corresponding keys.
+ * Note that the caller is responsible of calling ures_close on each succesfully
+ * opened resource bundle.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by Note: To search by short or long script alias only, use
+ * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
+ * a fast lookup with no access of the locale data.
+ * @param nameOrAbbrOrLocale name of the script, as given in
+ * PropertyValueAliases.txt, or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) fo UScriptCode buffer passed in.
+ * @param err the error status code.
+ * @return The number of script codes filled in the buffer passed in
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
+
+/**
+ * Gets a script name associated with the given script code.
+ * Returns "Malayam" given USCRIPT_MALAYALAM
+ * @param scriptCode UScriptCode enum
+ * @return script long name as given in
+ * PropertyValueAliases.txt, or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode);
+
+/**
+ * Gets a script name associated with the given script code.
+ * Returns "Mlym" given USCRIPT_MALAYALAM
+ * @param scriptCode UScriptCode enum
+ * @return script abbreviated name as given in
+ * PropertyValueAliases.txt, or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode);
+
+/**
+ * Gets the script code associated with the given codepoint.
+ * Returns USCRIPT_MALAYALAM given 0x0D02
+ * @param codepoint UChar32 codepoint
+ * @param err the error status code.
+ * @return The UScriptCode, or 0 if codepoint is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE UScriptCode U_EXPORT2
+uscript_getScript(UChar32 codepoint, UErrorCode *err);
+
+#endif
+
+
diff --git a/jni/EastAsianWidth/unicode/uset.h b/jni/EastAsianWidth/unicode/uset.h
new file mode 100644
index 0000000..8232ff9
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uset.h
@@ -0,0 +1,1055 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * This is a C wrapper around the C++ UnicodeSet class. To iterate over code points and strings, use a loop like this:
+ * Each item in the set is accessed as a string. Set elements
+ * consisting of single code points are returned as strings containing
+ * just the one code point.
+ *
+ * To iterate over code point ranges, instead of individual code points,
+ * use a loop like this:
+ *
+ * If isString() == TRUE, the value is a
+ * string, otherwise the value is a
+ * single code point. Elements of either type can be retrieved
+ * with the function getString(), while elements of
+ * consisting of a single code point can be retrieved with
+ * getCodepoint()
+ *
+ * The order of iteration is all code points in sorted order,
+ * followed by all strings sorted order. Do not mix
+ * calls to next() and nextRange() without
+ * calling reset() between them. The results of doing so
+ * are undefined.
+ *
+ * @return true if there was another element in the set.
+ * @stable ICU 2.4
+ */
+ UBool next();
+
+ /**
+ * Returns the next element in the set, either a code point range
+ * or a string. If there are no more elements in the set, return
+ * false. If isString() == TRUE, the value is a
+ * string and can be accessed with getString(). Otherwise the value is a
+ * range of one or more code points from getCodepoint() to
+ * getCodepointeEnd() inclusive.
+ *
+ * The order of iteration is all code points ranges in sorted
+ * order, followed by all strings sorted order. Ranges are
+ * disjoint and non-contiguous. The value returned from getString()
+ * is undefined unless isString() == TRUE. Do not mix calls to
+ * next() and nextRange() without calling
+ * reset() between them. The results of doing so are
+ * undefined.
+ *
+ * @return true if there was another element in the set.
+ * @stable ICU 2.4
+ */
+ UBool nextRange();
+
+ /**
+ * Sets this iterator to visit the elements of the given set and
+ * resets it to the start of that set. The iterator is valid only
+ * so long as set is valid.
+ * @param set the set to iterate over.
+ * @stable ICU 2.4
+ */
+ void reset(const UnicodeSet& set);
+
+ /**
+ * Resets this iterator to the start of the set.
+ * @stable ICU 2.4
+ */
+ void reset();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.4
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ // ======================= PRIVATES ===========================
+
+ protected:
+
+ // endElement and nextElements are really UChar32's, but we keep
+ // them as signed int32_t's so we can do comparisons with
+ // endElement set to -1. Leave them as int32_t's.
+ /** The set
+ * @stable ICU 2.4
+ */
+ const UnicodeSet* set;
+ /** End range
+ * @stable ICU 2.4
+ */
+ int32_t endRange;
+ /** Range
+ * @stable ICU 2.4
+ */
+ int32_t range;
+ /** End element
+ * @stable ICU 2.4
+ */
+ int32_t endElement;
+ /** Next element
+ * @stable ICU 2.4
+ */
+ int32_t nextElement;
+ //UBool abbreviated;
+ /** Next string
+ * @stable ICU 2.4
+ */
+ int32_t nextString;
+ /** String count
+ * @stable ICU 2.4
+ */
+ int32_t stringCount;
+
+ /**
+ * Points to the string to use when the caller asks for a
+ * string and the current iteration item is a code point, not a string.
+ * @internal
+ */
+ UnicodeString *cpString;
+
+ /** Copy constructor. Disallowed.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator(const UnicodeSetIterator&); // disallow
+
+ /** Assignment operator. Disallowed.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
+
+ /** Load range
+ * @stable ICU 2.4
+ */
+ virtual void loadRange(int32_t range);
+
+};
+
+inline UBool UnicodeSetIterator::isString() const {
+ return codepoint == (UChar32)IS_STRING;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepoint() const {
+ return codepoint;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
+ return codepointEnd;
+}
+
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ushape.h b/jni/EastAsianWidth/unicode/ushape.h
new file mode 100644
index 0000000..f165e14
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ushape.h
@@ -0,0 +1,263 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ushape.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000jun29
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USHAPE_H__
+#define __USHAPE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Arabic shaping
+ *
+ */
+
+/**
+ * Shape Arabic text on a character basis.
+ *
+ * This function performs basic operations for "shaping" Arabic text. It is most
+ * useful for use with legacy data formats and legacy display technology
+ * (simple terminals). All operations are performed on Unicode characters. Text-based shaping means that some character code points in the text are
+ * replaced by others depending on the context. It transforms one kind of text
+ * into another. In comparison, modern displays for Arabic text select
+ * appropriate, context-dependent font glyphs for each text element, which means
+ * that they transform text into a glyph vector. Text transformations are necessary when modern display technology is not
+ * available or when text needs to be transformed to or from legacy formats that
+ * use "shaped" characters. Since the Arabic script is cursive, connecting
+ * adjacent letters to each other, computers select images for each letter based
+ * on the surrounding letters. This usually results in four images per Arabic
+ * letter: initial, middle, final, and isolated forms. In Unicode, on the other
+ * hand, letters are normally stored abstract, and a display system is expected
+ * to select the necessary glyphs. (This makes searching and other text
+ * processing easier because the same letter has only one code.) It is possible
+ * to mimic this with text transformations because there are characters in
+ * Unicode that are rendered as letters with a specific shape
+ * (or cursive connectivity). They were included for interoperability with
+ * legacy systems and codepages, and for unsophisticated display systems. A second kind of text transformations is supported for Arabic digits:
+ * For compatibility with legacy codepages that only include European digits,
+ * it is possible to replace one set of digits by another, changing the
+ * character code points. These operations can be performed for either
+ * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
+ * digits (U+06f0...U+06f9). Some replacements may result in more or fewer characters (code points).
+ * By default, this means that the destination buffer may receive text with a
+ * length different from the source length. Some legacy systems rely on the
+ * length of the text to be constant. They expect extra spaces to be added
+ * or consumed either next to the affected character or at the end of the
+ * text. For details about the available operations, see the description of the
+ *
+ * If the UText was originally allocated by one of the utext_open functions,
+ * the storage associated with the utext will also be freed.
+ * If the UText storage originated with the application, as it would with
+ * a local or static instance, the storage will not be deleted.
+ *
+ * An open UText can be reset to refer to new string by using one of the utext_open()
+ * functions without first closing the UText.
+ *
+ * @param ut The UText to be closed.
+ * @return NULL if the UText struct was deleted by the close. If the UText struct
+ * was originally provided by the caller to the open function, it is
+ * returned by this function, and may be safely used again in
+ * a subsequent utext_open.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_close(UText *ut);
+
+
+/**
+ * Open a read-only UText implementation for UTF-8 strings.
+ *
+ * \htmlonly
+ * Any invalid UTF-8 in the input will be handled in this way:
+ * a sequence of bytes that has the form of a truncated, but otherwise valid,
+ * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
+ * Any other illegal bytes will each be replaced by a \uFFFD.
+ * \endhtmlonly
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UTF-8 string.
+ * @param s A UTF-8 string. Must not be NULL.
+ * @param length The length of the UTF-8 string in bytes, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
+
+
+/**
+ * Open a read-only UText for UChar * string.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UChar string.
+ * @param s A UChar (UTF-16) string
+ * @param length The number of UChars in the input string, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
+
+
+#ifdef XP_CPLUSPLUS
+/**
+ * Open a writable UText for a non-const UnicodeString.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A UnicodeString.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a UText for a const UnicodeString. The resulting UText will not be writable.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A const UnicodeString to be wrapped.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a writable UText implementation for an ICU Replaceable object.
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param rep A Replaceable text object.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
+
+/**
+ * Open a UText implementation over an ICU CharacterIterator.
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param ci A Character Iterator.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
+
+#endif
+
+
+/**
+ * Clone a UText. This is much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not affect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * The standard UText implementations for UTF8, UChar *, UnicodeString and
+ * Replaceable all support deep cloning.
+ *
+ * The UText returned from a deep clone will be writable, assuming that the text
+ * provider is able to support writing, even if the source UText had been made
+ * non-writable by means of UText_freeze().
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation will not fail, barring truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * Shallow UText clones should be avoided if the UText functions that modify the
+ * text are expected to be used, either on the original or the cloned UText.
+ * Any such modifications can cause unpredictable behavior. Read Only
+ * shallow clones provide some protection against errors of this type by
+ * disabling text modification via the cloned UText.
+ *
+ * A shallow clone made with the readOnly parameter == FALSE will preserve the
+ * utext_isWritable() state of the source object. Note, however, that
+ * write operations must be avoided while more than one UText exists that refer
+ * to the same underlying text.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for read access only with shallow clones, and for both read and
+ * write access with deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to become the clone.
+ * @param src The UText to be cloned.
+ * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
+ * @param readOnly TRUE to request that the cloned UText have read only access to the
+ * underlying text.
+
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * will be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
+
+
+/**
+ * Compare two UText objects for equality.
+ * UTexts are equal if they are iterating over the same text, and
+ * have the same iteration position within the text.
+ * If either or both of the parameters are NULL, the comparison is FALSE.
+ *
+ * @param a The first of the two UTexts to compare.
+ * @param b The other UText to be compared.
+ * @return TRUE if the two UTexts are equal.
+ * @stable ICU 3.6
+ */
+U_STABLE UBool U_EXPORT2
+utext_equals(const UText *a, const UText *b);
+
+
+/*****************************************************************************
+ *
+ * Functions to work with the text represeted by a UText wrapper
+ *
+ *****************************************************************************/
+
+/**
+ * Get the length of the text. Depending on the characteristics
+ * of the underlying text representation, this may be expensive.
+ * @see utext_isLengthExpensive()
+ *
+ *
+ * @param ut the text to be accessed.
+ * @return the length of the text, expressed in native units.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int64_t U_EXPORT2
+utext_nativeLength(UText *ut);
+
+/**
+ * Return TRUE if calculating the length of the text could be expensive.
+ * Finding the length of NUL terminated strings is considered to be expensive.
+ *
+ * Note that the value of this function may change
+ * as the result of other operations on a UText.
+ * Once the length of a string has been discovered, it will no longer
+ * be expensive to report it.
+ *
+ * @param ut the text to be accessed.
+ * @return TRUE if determining the length of the text could be time consuming.
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_isLengthExpensive(const UText *ut);
+
+/**
+ * Returns the code point at the requested index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * If the specified index points to the interior of a multi-unit
+ * character - one of the trail bytes of a UTF-8 sequence, for example -
+ * the complete code point will be returned.
+ *
+ * The iteration position will be set to the start of the returned code point.
+ *
+ * This function is roughly equivalent to the the sequence
+ * utext_setNativeIndex(index);
+ * utext_current32();
+ * (There is a subtle difference if the index is out of bounds by being less than zero -
+ * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
+ * will return the char at zero. utext_char32At(negative index), on the other hand, will
+ * return the U_SENTINEL value of -1.)
+ *
+ * @param ut the text to be accessed
+ * @param nativeIndex the native index of the character to be accessed. If the index points
+ * to other than the first unit of a multi-unit character, it will be adjusted
+ * to the start of the character.
+ * @return the code point at the specified index.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_char32At(UText *ut, int64_t nativeIndex);
+
+
+/**
+ *
+ * Get the code point at the current iteration position,
+ * or U_SENTINEL (-1) if the iteration has reached the end of
+ * the input text.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the current iterator position.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_current32(UText *ut);
+
+
+/**
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ *
+ * If the position is at the end of the text (the index following
+ * the last character, which is also the length of the text),
+ * return U_SENTINEL (-1) and do not advance the index.
+ *
+ * This is a post-increment operation.
+ *
+ * An inline macro version of this function, UTEXT_NEXT32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the iteration position.
+ * @see UTEXT_NEXT32
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_next32(UText *ut);
+
+
+/**
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ *
+ * If the initial position is at the start of the text (index of 0)
+ * return U_SENTINEL (-1), and leave the position unchanged.
+ *
+ * An inline macro version of this function, UTEXT_PREVIOUS32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the previous UChar32 code point, or U_SENTINEL (-1)
+ * if the iteration has reached the start of the text.
+ * @see UTEXT_PREVIOUS32
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_previous32(UText *ut);
+
+
+/**
+ * Set the iteration index and return the code point at that index.
+ * Leave the iteration index at the start of the following code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a forward iteration. The results are identical to the those
+ * from the sequence
+ * \code
+ * utext_setIndex();
+ * utext_next32();
+ * \endcode
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index, in the native units of the text provider.
+ * @return Code point which starts at or before index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_next32From(UText *ut, int64_t nativeIndex);
+
+
+
+/**
+ * Set the iteration index, and return the code point preceding the
+ * one specified by the initial index. Leave the iteration position
+ * at the start of the returned code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a backwards iteration.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index in the native units of the text provider.
+ * @return Code point preceding the one at the initial index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_previous32From(UText *ut, int64_t nativeIndex);
+
+/**
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @param ut the text to be accessed.
+ * @return the current index position, in the native units of the text provider.
+ * @stable ICU 3.4
+ */
+U_STABLE int64_t U_EXPORT2
+utext_getNativeIndex(const UText *ut);
+
+/**
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ *
+ * It will usually be more efficient to begin an iteration
+ * using the functions utext_next32From() or utext_previous32From()
+ * rather than setIndex().
+ *
+ * Moving the index position to an adjacent character is best done
+ * with utext_next32(), utext_previous32() or utext_moveIndex32().
+ * Attempting to do direct arithmetic on the index position is
+ * complicated by the fact that the size (in native units) of a
+ * character depends on the underlying representation of the character
+ * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
+ * easily knowable.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex the native unit index of the new iteration position.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+utext_setNativeIndex(UText *ut, int64_t nativeIndex);
+
+/**
+ * Move the iterator postion by delta code points. The number of code points
+ * is a signed number; a negative delta will move the iterator backwards,
+ * towards the start of the text.
+ *
+ * The index is moved by
+ * The size (number of 16 bit UChars) of the data to be extracted is returned. The
+ * full number of UChars is returned, even when the extracted text is truncated
+ * because the specified buffer size is too small.
+ *
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer. This
+ * terminating NUL is not included in the returned length.
+ *
+ * The iteration index is left at the position following the last extracted character.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first character to extract.\
+ * If the specified index is out of range,
+ * it will be pinned to to be within 0 <= index <= textLength
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract. If the specified index is out of range,
+ * it will be pinned to to be within 0 <= index <= textLength.
+ * nativeLimit must be >= nativeStart.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
+ * buffer was too small. Returns number of UChars for preflighting.
+ * @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+utext_extract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+
+/************************************************************************************
+ *
+ * #define inline versions of selected performance-critical text access functions
+ * Caution: do not use auto increment++ or decrement-- expressions
+ * as parameters to these macros.
+ *
+ * For most use, where there is no extreme performance constraint, the
+ * normal, non-inline functions are a better choice. The resulting code
+ * will be smaller, and, if the need ever arises, easier to debug.
+ *
+ * These are implemented as #defines rather than real functions
+ * because there is no fully portable way to do inline functions in plain C.
+ *
+ ************************************************************************************/
+
+/**
+ * inline version of utext_next32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ * This is a post-increment operation.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_NEXT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
+
+/**
+ * inline version of utext_previous32(), for performance-critical situations.
+ *
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ * Returns U_SENTINEL (-1) if the position is at the start of the text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_PREVIOUS32(ut) \
+ ((ut)->chunkOffset > 0 && \
+ (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
+ (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
+
+/**
+ * inline version of utext_getNativeIndex(), for performance-critical situations.
+ *
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_GETNATIVEINDEX(ut) \
+ ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
+ (ut)->chunkNativeStart+(ut)->chunkOffset : \
+ (ut)->pFuncs->mapOffsetToNative(ut))
+
+/**
+ * inline version of utext_setNativeIndex(), for performance-critical situations.
+ *
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ *
+ * @draft ICU 3.8
+ */
+#define UTEXT_SETNATIVEINDEX(ut, ix) \
+ { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+ if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
+ (ut)->chunkOffset=(int32_t)__offset; \
+ } else { \
+ utext_setNativeIndex((ut), (ix)); } }
+
+
+
+/************************************************************************************
+ *
+ * Functions related to writing or modifying the text.
+ * These will work only with modifiable UTexts. Attempting to
+ * modify a read-only UText will return an error status.
+ *
+ ************************************************************************************/
+
+
+/**
+ * Return TRUE if the text can be written (modified) with utext_replace() or
+ * utext_copy(). For the text to be writable, the text provider must
+ * be of a type that supports writing and the UText must not be frozen.
+ *
+ * Attempting to modify text when utext_isWriteable() is FALSE will fail -
+ * the text will not be modified, and an error will be returned from the function
+ * that attempted the modification.
+ *
+ * @param ut the UText to be tested.
+ * @return TRUE if the text is modifiable.
+ *
+ * @see utext_freeze()
+ * @see utext_replace()
+ * @see utext_copy()
+ * @stable ICU 3.4
+ *
+ */
+U_STABLE UBool U_EXPORT2
+utext_isWritable(const UText *ut);
+
+
+/**
+ * Test whether there is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ *
+ * @param ut The UText to be tested
+ * @return TRUE if the underlying text includes meta data.
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_hasMetaData(const UText *ut);
+
+
+/**
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns TRUE.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a replace operation
+ * on a UText is undefined for any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the native index of the start of the region to be replaced
+ * @param nativeLimit the native index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+utext_replace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacementLength,
+ UErrorCode *status);
+
+
+
+/**
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * The iteration position is left following the newly inserted text
+ * at the destination position.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns TRUE.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a copy operation
+ * on a UText is undefined in any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The native index of the start of the region to be copied or moved
+ * @param nativeLimit The native index of the character position following the region
+ * to be copied.
+ * @param destIndex The native destination index to which the source substring is
+ * copied or moved.
+ * @param move If TRUE, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+utext_copy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *status);
+
+
+/**
+ *
+ * Freeze a UText. This prevents any modification to the underlying text itself
+ * by means of functions operating on this UText.
+ *
+ * Once frozen, a UText can not be unfrozen. The intent is to ensure
+ * that a the text underlying a frozen UText wrapper cannot be modified via that UText.
+ *
+ * Caution: freezing a UText will disable changes made via the specific
+ * frozen UText wrapper only; it will not have any effect on the ability to
+ * directly modify the text by bypassing the UText. Any such backdoor modifications
+ * are always an error while UText access is occuring because the underlying
+ * text can get out of sync with UText's buffering.
+ *
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first characer to extract.
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
+ * preflighting.
+ * @return Number of UChars in the data. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextExtract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.replace().
+ *
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function need only be implemented on UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the index of the start of the region to be replaced
+ * @param nativeLimit the index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextReplace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacmentLength,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.copy().
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * This function need only be implemented for UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The index of the start of the region to be copied or moved
+ * @param nativeLimit The index of the character following the region to be replaced.
+ * @param nativeDest The destination index to which the source substring is copied or moved.
+ * @param move If TRUE, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextCopy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t nativeDest,
+ UBool move,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.mapOffsetToNative().
+ * Map from the current UChar offset within the current text chunk to
+ * the corresponding native index in the original source text.
+ *
+ * This is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut the UText.
+ * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
+ * The returned native index should always be to a code point boundary.
+ *
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextMapOffsetToNative(const UText *ut);
+
+/**
+ * Function type declaration for UText.mapIndexToUTF16().
+ * Map from a native index to a UChar offset within a text chunk.
+ * Behavior is undefined if the native index does not fall within the
+ * current chunk.
+ *
+ * This function is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut The UText containing the text chunk.
+ * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
+ * @return Chunk-relative UTF-16 offset corresponding to the specified native
+ * index.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
+
+
+/**
+ * Function type declaration for UText.utextClose().
+ *
+ * A Text Provider close function is only required for provider types that make
+ * allocations in their open function (or other functions) that must be
+ * cleaned when the UText is closed.
+ *
+ * The allocation of the UText struct itself and any "extra" storage
+ * associated with the UText is handled by the common UText implementation
+ * and does not require provider specific cleanup in a close function.
+ *
+ * Most UText provider implementations do not need to implement this function.
+ *
+ * @param ut A UText object to be closed.
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextClose(UText *ut);
+
+
+/**
+ * (public) Function dispatch table for UText.
+ * Conceptually very much like a C++ Virtual Function Table.
+ * This struct defines the organization of the table.
+ * Each text provider implementation must provide an
+ * actual table that is initialized with the appropriate functions
+ * for the type of text being handled.
+ * @stable ICU 3.6
+ */
+struct UTextFuncs {
+ /**
+ * (public) Function table size, sizeof(UTextFuncs)
+ * Intended for use should the table grow to accomodate added
+ * functions in the future, to allow tests for older format
+ * function tables that do not contain the extensions.
+ *
+ * Fields are placed for optimal alignment on
+ * 32/64/128-bit-pointer machines, by normally grouping together
+ * 4 32-bit fields,
+ * 4 pointers,
+ * 2 64-bit fields
+ * in sequence.
+ * @stable ICU 3.6
+ */
+ int32_t tableSize;
+
+ /**
+ * (private) Alignment padding.
+ * Do not use, reserved for use by the UText framework only.
+ * @internal
+ */
+ int32_t reserved1, reserved2, reserved3;
+
+
+ /**
+ * (public) Function pointer for UTextClone
+ *
+ * @see UTextClone
+ * @stable ICU 3.6
+ */
+ UTextClone *clone;
+
+ /**
+ * (public) function pointer for UTextLength
+ * May be expensive to compute!
+ *
+ * @see UTextLength
+ * @stable ICU 3.6
+ */
+ UTextNativeLength *nativeLength;
+
+ /**
+ * (public) Function pointer for UTextAccess.
+ *
+ * @see UTextAccess
+ * @stable ICU 3.6
+ */
+ UTextAccess *access;
+
+ /**
+ * (public) Function pointer for UTextExtract.
+ *
+ * @see UTextExtract
+ * @stable ICU 3.6
+ */
+ UTextExtract *extract;
+
+ /**
+ * (public) Function pointer for UTextReplace.
+ *
+ * @see UTextReplace
+ * @stable ICU 3.6
+ */
+ UTextReplace *replace;
+
+ /**
+ * (public) Function pointer for UTextCopy.
+ *
+ * @see UTextCopy
+ * @stable ICU 3.6
+ */
+ UTextCopy *copy;
+
+ /**
+ * (public) Function pointer for UTextMapOffsetToNative.
+ *
+ * @see UTextMapOffsetToNative
+ * @stable ICU 3.6
+ */
+ UTextMapOffsetToNative *mapOffsetToNative;
+
+ /**
+ * (public) Function pointer for UTextMapNativeIndexToUTF16.
+ *
+ * @see UTextMapNativeIndexToUTF16
+ * @stable ICU 3.6
+ */
+ UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
+
+ /**
+ * (public) Function pointer for UTextClose.
+ *
+ * @see UTextClose
+ * @stable ICU 3.6
+ */
+ UTextClose *close;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+
+ UTextClose *spare1;
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare2;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare3;
+
+};
+typedef struct UTextFuncs UTextFuncs;
+
+ /**
+ * UText struct. Provides the interface between the generic UText access code
+ * and the UText provider code that works on specific kinds of
+ * text (UTF-8, noncontiguous UTF-16, whatever.)
+ *
+ * Applications that are using predefined types of text providers
+ * to pass text data to ICU services will have no need to view the
+ * internals of the UText structs that they open.
+ *
+ * @stable ICU 3.6
+ */
+struct UText {
+ /**
+ * (private) Magic. Used to help detect when UText functions are handed
+ * invalid or unitialized UText structs.
+ * utext_openXYZ() functions take an initialized,
+ * but not necessarily open, UText struct as an
+ * optional fill-in parameter. This magic field
+ * is used to check for that initialization.
+ * Text provider close functions must NOT clear
+ * the magic field because that would prevent
+ * reuse of the UText struct.
+ * @internal
+ */
+ uint32_t magic;
+
+
+ /**
+ * (private) Flags for managing the allocation and freeing of
+ * memory associated with this UText.
+ * @internal
+ */
+ int32_t flags;
+
+
+ /**
+ * Text provider properties. This set of flags is maintainted by the
+ * text provider implementation.
+ * @stable ICU 3.4
+ */
+ int32_t providerProperties;
+
+ /**
+ * (public) sizeOfStruct=sizeof(UText)
+ * Allows possible backward compatible extension.
+ *
+ * @stable ICU 3.4
+ */
+ int32_t sizeOfStruct;
+
+ /* ------ 16 byte alignment boundary ----------- */
+
+
+ /**
+ * (protected) Native index of the first character position following
+ * the current chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeLimit;
+
+ /**
+ * (protected) Size in bytes of the extra space (pExtra).
+ * @stable ICU 3.4
+ */
+ int32_t extraSize;
+
+ /**
+ * (protected) The highest chunk offset where native indexing and
+ * chunk (UTF-16) indexing correspond. For UTF-16 sources, value
+ * will be equal to chunkLength.
+ *
+ * @stable ICU 3.6
+ */
+ int32_t nativeIndexingLimit;
+
+ /* ---- 16 byte alignment boundary------ */
+
+ /**
+ * (protected) Native index of the first character in the text chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeStart;
+
+ /**
+ * (protected) Current iteration position within the text chunk (UTF-16 buffer).
+ * This is the index to the character that will be returned by utext_next32().
+ * @stable ICU 3.6
+ */
+ int32_t chunkOffset;
+
+ /**
+ * (protected) Length the text chunk (UTF-16 buffer), in UChars.
+ * @stable ICU 3.6
+ */
+ int32_t chunkLength;
+
+ /* ---- 16 byte alignment boundary-- */
+
+
+ /**
+ * (protected) pointer to a chunk of text in UTF-16 format.
+ * May refer either to original storage of the source of the text, or
+ * if conversion was required, to a buffer owned by the UText.
+ * @stable ICU 3.6
+ */
+ const UChar *chunkContents;
+
+ /**
+ * (public) Pointer to Dispatch table for accessing functions for this UText.
+ * @stable ICU 3.6
+ */
+ UTextFuncs *pFuncs;
+
+ /**
+ * (protected) Pointer to additional space requested by the
+ * text provider during the utext_open operation.
+ * @stable ICU 3.4
+ */
+ void *pExtra;
+
+ /**
+ * (protected) Pointer to string or text-containin object or similar.
+ * This is the source of the text that this UText is wrapping, in a format
+ * that is known to the text provider functions.
+ * @stable ICU 3.4
+ */
+ const void *context;
+
+ /* --- 16 byte alignment boundary--- */
+
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *p;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *q;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *r;
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ void *privP;
+
+
+ /* --- 16 byte alignment boundary--- */
+
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int64_t a;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t b;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t c;
+
+ /* ---- 16 byte alignment boundary---- */
+
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int64_t privA;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privB;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privC;
+};
+
+
+/**
+ * Common function for use by Text Provider implementations to allocate and/or initialize
+ * a new UText struct. To be called in the implementation of utext_open() functions.
+ * If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
+ * If the supplied UText is already open, the provider's close function will be called
+ * so that the struct can be reused by the open that is in progress.
+ *
+ * @param ut pointer to a UText struct to be re-used, or null if a new UText
+ * should be allocated.
+ * @param extraSpace The amount of additional space to be allocated as part
+ * of this UText, for use by types of providers that require
+ * additional storage.
+ * @param status Errors are returned here.
+ * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
+
+/**
+ * @internal
+ * Value used to help identify correctly initialized UText structs.
+ * Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
+ */
+enum {
+ UTEXT_MAGIC = 0x345ad82c
+};
+
+/**
+ * initializer to be used with local (stack) instances of a UText
+ * struct. UText structs must be initialized before passing
+ * them to one of the utext_open functions.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_INITIALIZER { \
+ UTEXT_MAGIC, /* magic */ \
+ 0, /* flags */ \
+ 0, /* providerProps */ \
+ sizeof(UText), /* sizeOfStruct */ \
+ 0, /* chunkNativeLimit */ \
+ 0, /* extraSize */ \
+ 0, /* nativeIndexingLimit */ \
+ 0, /* chunkNativeStart */ \
+ 0, /* chunkOffset */ \
+ 0, /* chunkLength */ \
+ NULL, /* chunkContents */ \
+ NULL, /* pFuncs */ \
+ NULL, /* pExtra */ \
+ NULL, /* context */ \
+ NULL, NULL, NULL, /* p, q, r */ \
+ NULL, /* privP */ \
+ 0, 0, 0, /* a, b, c */ \
+ 0, 0, 0 /* privA,B,C, */ \
+ }
+
+
+U_CDECL_END
+
+
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/utf.h b/jni/EastAsianWidth/unicode/utf.h
new file mode 100644
index 0000000..1682283
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/utf.h
@@ -0,0 +1,227 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * The UChar and UChar32 data types for Unicode code units and code points
+ * are defined in umachines.h because they can be machine-dependent.
+ *
+ * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
+ * common definitions. Those files define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h also defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16.
+ * The macros will detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ * (It is an accidental property of Unicode and UTF-16 that all
+ * malformed sequences can be expressed unambiguously with a distinct subrange
+ * of Unicode code points.)
+ *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default. This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility. Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t. UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself. utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_... Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is FALSE, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence. Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter. It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE. For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used. UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8: These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). Usage: ICU coding guidelines for if() statements should be followed when using these macros.
+* Compound statements (curly braces {}) must be used for if-else-while...
+* bodies and all macro statements should be terminated with semicolon.
+ * Tracing and Threads: Tracing functions are global to a process, and
+ * will be called in response to ICU operations performed by any
+ * thread. If tracing of an individual thread is desired, the
+ * tracing functions must themselves filter by checking that the
+ * current thread is the desired thread.
+ *
+ * @param context an uninterpretted pointer. Whatever is passed in
+ * here will in turn be passed to each of the tracing
+ * functions UTraceEntry, UTraceExit and UTraceData.
+ * ICU does not use or alter this pointer.
+ * @param e Callback function to be called on entry to a
+ * a traced ICU function.
+ * @param x Callback function to be called on exit from a
+ * traced ICU function.
+ * @param d Callback function to be called from within a
+ * traced ICU function, for the purpose of providing
+ * data to the trace.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_setFunctions(const void *context,
+ UTraceEntry *e, UTraceExit *x, UTraceData *d);
+
+/**
+ * Get the currently installed ICU tracing functions. Note that a null function
+ * pointer will be returned if no trace function has been set.
+ *
+ * @param context The currently installed tracing context.
+ * @param e The currently installed UTraceEntry function.
+ * @param x The currently installed UTraceExit function.
+ * @param d The currently installed UTraceData function.
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_getFunctions(const void **context,
+ UTraceEntry **e, UTraceExit **x, UTraceData **d);
+
+
+
+/*
+ *
+ * ICU trace format string syntax
+ *
+ * Format Strings are passed to UTraceData functions, and define the
+ * number and types of the trace data being passed on each call.
+ *
+ * The UTraceData function, which is supplied by the application,
+ * not by ICU, can either forward the trace data (passed via
+ * varargs) and the format string back to ICU for formatting into
+ * a displayable string, or it can interpret the format itself,
+ * and do as it wishes with the trace data.
+ *
+ *
+ * Goals for the format string
+ * - basic data output
+ * - easy to use for trace programmer
+ * - sufficient provision for data types for trace output readability
+ * - well-defined types and binary portable APIs
+ *
+ * Non-goals
+ * - printf compatibility
+ * - fancy formatting
+ * - argument reordering and other internationalization features
+ *
+ * ICU trace format strings contain plain text with argument inserts,
+ * much like standard printf format strings.
+ * Each insert begins with a '%', then optionally contains a 'v',
+ * then exactly one type character.
+ * Two '%' in a row represent a '%' instead of an insert.
+ * The trace format strings need not have \n at the end.
+ *
+ *
+ * Types
+ * -----
+ *
+ * Type characters:
+ * - c A char character in the default codepage.
+ * - s A NUL-terminated char * string in the default codepage.
+ * - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
+ * - b A byte (8-bit integer).
+ * - h A 16-bit integer. Also a 16 bit Unicode code unit.
+ * - d A 32-bit integer. Also a 20 bit Unicode code point value.
+ * - l A 64-bit integer.
+ * - p A data pointer.
+ *
+ * Vectors
+ * -------
+ *
+ * If the 'v' is not specified, then one item of the specified type
+ * is passed in.
+ * If the 'v' (for "vector") is specified, then a vector of items of the
+ * specified type is passed in, via a pointer to the first item
+ * and an int32_t value for the length of the vector.
+ * Length==-1 means zero or NUL termination. Works for vectors of all types.
+ *
+ * Note: %vS is a vector of (UChar *) strings. The strings must
+ * be nul terminated as there is no way to provide a
+ * separate length parameter for each string. The length
+ * parameter (required for all vectors) is the number of
+ * strings, not the length of the strings.
+ *
+ * Examples
+ * --------
+ *
+ * These examples show the parameters that will be passed to an application's
+ * UTraceData() function for various formats.
+ *
+ * - the precise formatting is up to the application!
+ * - the examples use type casts for arguments only to _show_ the types of
+ * arguments without needing variable declarations in the examples;
+ * the type casts will not be necessary in actual code
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "There is a character %c in the string %s.", // Format String
+ * (char)c, (const char *)s); // varargs parameters
+ * -> There is a character 0x42 'B' in the string "Bravo".
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "Vector of bytes %vb vector of chars %vc",
+ * (const uint8_t *)bytes, (int32_t)bytesLength,
+ * (const char *)chars, (int32_t)charsLength);
+ * -> Vector of bytes
+ * 42 63 64 3f [4]
+ * vector of chars
+ * "Bcd?"[4]
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "An int32_t %d and a whole bunch of them %vd",
+ * (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
+ * -> An int32_t 0xfffffffb and a whole bunch of them
+ * fffffffb 00000005 0000010a [3]
+ *
+ */
+
+
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * back to this function to format the trace output in a
+ * human readable form. Note that a UTraceData function may choose
+ * to not format the data; it could, for example, save it in
+ * in the raw form it was received (more compact), leaving
+ * formatting for a later trace analyis tool.
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param args Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, va_list args);
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * this function to format any additional trace data, beyond that
+ * provided by default, in human readable form with the same
+ * formatting conventions used by utrace_vformat().
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param ... Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, ...);
+
+
+
+/* Trace function numbers --------------------------------------------------- */
+
+/**
+ * Get the name of a function from its trace function number.
+ *
+ * @param fnNumber The trace number for an ICU function.
+ * @return The name string for the function.
+ *
+ * @see UTraceFunctionNumber
+ * @stable ICU 2.8
+ */
+U_STABLE const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber);
+
+U_CDECL_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/utypes.h b/jni/EastAsianWidth/unicode/utypes.h
new file mode 100644
index 0000000..4c07c10
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/utypes.h
@@ -0,0 +1,790 @@
+/*
+**********************************************************************
+* Copyright (C) 1996-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+* Date Name Description
+* 12/11/96 helena Creation.
+* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
+* uint8, uint16, and uint32.
+* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
+* well as C++.
+* Modified to use memcpy() for uprv_arrayCopy() fns.
+* 04/14/97 aliu Added TPlatformUtilities.
+* 05/07/97 aliu Added import/export specifiers (replacing the old
+* broken EXT_CLASS). Added version number for our
+* code. Cleaned up header.
+* 6/20/97 helena Java class name change.
+* 08/11/98 stephen UErrorCode changed from typedef to enum
+* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
+* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+* 04/20/99 stephen Cleaned up & reworked for autoconf.
+* Renamed to utypes.h.
+* 05/05/99 stephen Changed to use These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform. Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available. The list of "invariant characters" is: This matches the IBM Syntactic Character Set (CS 640). In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:UnicodeString according to the specified normalization mode.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The options parameter specifies which optional
+ * Normalizer features are to be enabled for this operation.
+ *
+ * @param source the input string to be normalized.
+ * @param mode the normalization mode
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The normalized string (on output).
+ * @param status The error code.
+ * @stable ICU 2.0
+ */
+ static void U_EXPORT2 normalize(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Compose a UnicodeString.
+ * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The options parameter specifies which optional
+ * Normalizer features are to be enabled for this operation.
+ *
+ * @param source the string to be composed.
+ * @param compat Perform compatibility decomposition before composition.
+ * If this argument is FALSE, only canonical
+ * decomposition will be performed.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The composed string (on output).
+ * @param status The error code.
+ * @stable ICU 2.0
+ */
+ static void U_EXPORT2 compose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Static method to decompose a UnicodeString.
+ * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The options parameter specifies which optional
+ * Normalizer features are to be enabled for this operation.
+ *
+ * @param source the string to be decomposed.
+ * @param compat Perform compatibility decomposition.
+ * If this argument is FALSE, only canonical
+ * decomposition will be performed.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The decomposed string (on output).
+ * @param status The error code.
+ * @stable ICU 2.0
+ */
+ static void U_EXPORT2 decompose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
+ *
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ * @param source string for determining if it is in a normalized format
+ * @param mode normalization format
+ * @param status A reference to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see isNormalized
+ * @stable ICU 2.0
+ */
+ static inline UNormalizationCheckResult
+ quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
+
+ /**
+ * Performing quick check on a string; same as the other version of quickCheck
+ * but takes an extra options parameter like most normalization functions.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param mode normalization format
+ * @param options the optional features to be enabled (0 for no options)
+ * @param status A reference to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see isNormalized
+ * @stable ICU 2.6
+ */
+ static inline UNormalizationCheckResult
+ quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
+
+ /**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a TRUE/FALSE result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param mode Which normalization form to test for.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see quickCheck
+ * @stable ICU 2.2
+ */
+ static inline UBool
+ isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
+
+ /**
+ * Test if a string is in a given normalization form; same as the other version of isNormalized
+ * but takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param mode Which normalization form to test for.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see quickCheck
+ * @stable ICU 2.6
+ */
+ static inline UBool
+ isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
+
+ /**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * For details see unorm_concatenate in unorm.h.
+ *
+ * @param left Left source string.
+ * @param right Right source string.
+ * @param result The output string.
+ * @param mode The normalization mode.
+ * @param options A bit set of normalization options.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return result
+ *
+ * @see unorm_concatenate
+ * @see normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @stable ICU 2.1
+ */
+ static UnicodeString &
+ U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
+ UnicodeString &result,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &errorCode);
+
+ /**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param s2 Second source string.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_compare
+ * @see normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+ static inline int32_t
+ compare(const UnicodeString &s1, const UnicodeString &s2,
+ uint32_t options,
+ UErrorCode &errorCode);
+
+ //-------------------------------------------------------------------------
+ // Iteration API
+ //-------------------------------------------------------------------------
+
+ /**
+ * Return the current character in the normalized text.
+ * current() may need to normalize some text at getIndex().
+ * The getIndex() is not changed.
+ *
+ * @return the current normalized code point
+ * @stable ICU 2.0
+ */
+ UChar32 current(void);
+
+ /**
+ * Return the first character in the normalized text.
+ * This is equivalent to setIndexOnly(startIndex()) followed by next().
+ * (Post-increment semantics.)
+ *
+ * @return the first normalized code point
+ * @stable ICU 2.0
+ */
+ UChar32 first(void);
+
+ /**
+ * Return the last character in the normalized text.
+ * This is equivalent to setIndexOnly(endIndex()) followed by previous().
+ * (Pre-decrement semantics.)
+ *
+ * @return the last normalized code point
+ * @stable ICU 2.0
+ */
+ UChar32 last(void);
+
+ /**
+ * Return the next character in the normalized text.
+ * (Post-increment semantics.)
+ * If the end of the text has already been reached, DONE is returned.
+ * The DONE value could be confused with a U+FFFF non-character code point
+ * in the text. If this is possible, you can test getIndex()CharacterIterator or the start (i.e. index 0) of the string
+ * over which this Normalizer is iterating.
+ *
+ * @return the smallest index in the input text where the Normalizer operates
+ * @stable ICU 2.0
+ */
+ int32_t startIndex(void) const;
+
+ /**
+ * Retrieve the index of the end of the input text. This is the end index
+ * of the CharacterIterator or the length of the string
+ * over which this Normalizer is iterating.
+ * This end index is exclusive, i.e., the Normalizer operates only on characters
+ * before this index.
+ *
+ * @return the first index in the input text where the Normalizer does not operate
+ * @stable ICU 2.0
+ */
+ int32_t endIndex(void) const;
+
+ /**
+ * Returns TRUE when both iterators refer to the same character in the same
+ * input text.
+ *
+ * @param that a Normalizer object to compare this one to
+ * @return comparison result
+ * @stable ICU 2.0
+ */
+ UBool operator==(const Normalizer& that) const;
+
+ /**
+ * Returns FALSE when both iterators refer to the same character in the same
+ * input text.
+ *
+ * @param that a Normalizer object to compare this one to
+ * @return comparison result
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const Normalizer& that) const;
+
+ /**
+ * Returns a pointer to a new Normalizer that is a clone of this one.
+ * The caller is responsible for deleting the new clone.
+ * @return a pointer to a new Normalizer
+ * @stable ICU 2.0
+ */
+ Normalizer* clone(void) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ *
+ * @return the hash code
+ * @stable ICU 2.0
+ */
+ int32_t hashCode(void) const;
+
+ //-------------------------------------------------------------------------
+ // Property access methods
+ //-------------------------------------------------------------------------
+
+ /**
+ * Set the normalization mode for this object.
+ * setMode.
+ * Normalizer.
+ * @see #getUMode
+ * @stable ICU 2.0
+ */
+ void setMode(UNormalizationMode newMode);
+
+ /**
+ * Return the normalization mode for this object.
+ *
+ * This is an unusual name because there used to be a getMode() that
+ * returned a different type.
+ *
+ * @return the mode for this Normalizer
+ * @see #setMode
+ * @stable ICU 2.0
+ */
+ UNormalizationMode getUMode(void) const;
+
+ /**
+ * Set options that affect this Normalizer's operation.
+ * Options do not change the basic composition or decomposition operation
+ * that is being performed, but they control whether
+ * certain optional portions of the operation are done.
+ * Currently the only available option is obsolete.
+ *
+ * It is possible to specify multiple options that are all turned on or off.
+ *
+ * @param option the option(s) whose value is/are to be set.
+ * @param value the new setting for the option. Use TRUE to
+ * turn the option(s) on and FALSE to turn it/them off.
+ *
+ * @see #getOption
+ * @stable ICU 2.0
+ */
+ void setOption(int32_t option,
+ UBool value);
+
+ /**
+ * Determine whether an option is turned on or off.
+ * If multiple options are specified, then the result is TRUE if any
+ * of them are set.
+ * Normalizer will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a string that replaces the current input text
+ * @param status a UErrorCode
+ * @stable ICU 2.0
+ */
+ void setText(const UnicodeString& newText,
+ UErrorCode &status);
+
+ /**
+ * Set the input text over which this Normalizer will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a CharacterIterator object that replaces the current input text
+ * @param status a UErrorCode
+ * @stable ICU 2.0
+ */
+ void setText(const CharacterIterator& newText,
+ UErrorCode &status);
+
+ /**
+ * Set the input text over which this Normalizer will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a string that replaces the current input text
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @param status a UErrorCode
+ * @stable ICU 2.0
+ */
+ void setText(const UChar* newText,
+ int32_t length,
+ UErrorCode &status);
+ /**
+ * Copies the input text into the UnicodeString argument.
+ *
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ void getText(UnicodeString& result);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ * @returns a UClassID for this class.
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * @return a UClassID for the actual class.
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ //-------------------------------------------------------------------------
+ // Private functions
+ //-------------------------------------------------------------------------
+
+ Normalizer(); // default constructor not implemented
+ Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
+
+ // Private utility methods for iteration
+ // For documentation, see the source code
+ UBool nextNormalize();
+ UBool previousNormalize();
+
+ void init(CharacterIterator *iter);
+ void clearBuffer(void);
+
+ //-------------------------------------------------------------------------
+ // Private data
+ //-------------------------------------------------------------------------
+
+ UNormalizationMode fUMode;
+ int32_t fOptions;
+
+ // The input text and our position in it
+ UCharIterator *text;
+
+ // The normalization buffer is the result of normalization
+ // of the source in [currentIndex..nextIndex[ .
+ int32_t currentIndex, nextIndex;
+
+ // A buffer for holding intermediate results
+ UnicodeString buffer;
+ int32_t bufferPos;
+
+};
+
+//-------------------------------------------------------------------------
+// Inline implementations
+//-------------------------------------------------------------------------
+
+inline UBool
+Normalizer::operator!= (const Normalizer& other) const
+{ return ! operator==(other); }
+
+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+ UNormalizationMode mode,
+ UErrorCode &status) {
+ if(U_FAILURE(status)) {
+ return UNORM_MAYBE;
+ }
+
+ return unorm_quickCheck(source.getBuffer(), source.length(),
+ mode, &status);
+}
+
+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &status) {
+ if(U_FAILURE(status)) {
+ return UNORM_MAYBE;
+ }
+
+ return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
+ mode, options, &status);
+}
+
+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+ UNormalizationMode mode,
+ UErrorCode &status) {
+ if(U_FAILURE(status)) {
+ return FALSE;
+ }
+
+ return unorm_isNormalized(source.getBuffer(), source.length(),
+ mode, &status);
+}
+
+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &status) {
+ if(U_FAILURE(status)) {
+ return FALSE;
+ }
+
+ return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
+ mode, options, &status);
+}
+
+inline int32_t
+Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
+ uint32_t options,
+ UErrorCode &errorCode) {
+ // all argument checking is done in unorm_compare
+ return unorm_compare(s1.getBuffer(), s1.length(),
+ s2.getBuffer(), s2.length(),
+ options,
+ &errorCode);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif // NORMLZR_H
diff --git a/jni/EastAsianWidth/unicode/parseerr.h b/jni/EastAsianWidth/unicode/parseerr.h
new file mode 100644
index 0000000..44ff008
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/parseerr.h
@@ -0,0 +1,92 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 03/14/00 aliu Creation.
+* 06/27/00 aliu Change from C++ class to C struct
+**********************************************************************
+*/
+#ifndef PARSEERR_H
+#define PARSEERR_H
+
+#include "unicode/utypes.h"
+
+
+/**
+ * \file
+ * \brief C API: Parse Error Information
+ */
+/**
+ * The capacity of the context strings in UParseError.
+ * @stable ICU 2.0
+ */
+enum { U_PARSE_CONTEXT_LEN = 16 };
+
+/**
+ * A UParseError struct is used to returned detailed information about
+ * parsing errors. It is used by ICU parsing engines that parse long
+ * rules, patterns, or programs, where the text being parsed is long
+ * enough that more information than a UErrorCode is needed to
+ * localize the error.
+ *
+ *
+ * preContext postContext
+ * "" "" The parser does not support context
+ * "let " "=7" Pre- and post-context only
+ * "let " "for=7" Pre- and post-context and error text
+ * "" "for" Error text only
+ *
+ *
+ * ParsePosition is a simple class used by Format
+ * and its subclasses to keep track of the current position during parsing.
+ * The parseObject method in the various Format
+ * classes requires a ParsePosition object as an argument.
+ *
+ * ParsePosition, since the index parameter
+ * records the current position.
+ *
+ * The ParsePosition class is not suitable for subclassing.
+ *
+ * @version 1.3 10/30/97
+ * @author Mark Davis, Helena Shih
+ * @see java.text.Format
+ */
+
+class U_COMMON_API ParsePosition : public UObject {
+public:
+ /**
+ * Default constructor, the index starts with 0 as default.
+ * @stable ICU 2.0
+ */
+ ParsePosition()
+ : UObject(),
+ index(0),
+ errorIndex(-1)
+ {}
+
+ /**
+ * Create a new ParsePosition with the given initial index.
+ * @param newIndex the new text offset.
+ * @stable ICU 2.0
+ */
+ ParsePosition(int32_t newIndex)
+ : UObject(),
+ index(newIndex),
+ errorIndex(-1)
+ {}
+
+ /**
+ * Copy constructor
+ * @param copy the object to be copied from.
+ * @stable ICU 2.0
+ */
+ ParsePosition(const ParsePosition& copy)
+ : UObject(copy),
+ index(copy.index),
+ errorIndex(copy.errorIndex)
+ {}
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~ParsePosition();
+
+ /**
+ * Assignment operator
+ * @stable ICU 2.0
+ */
+ ParsePosition& operator=(const ParsePosition& copy);
+
+ /**
+ * Equality operator.
+ * @return TRUE if the two parse positions are equal, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool operator==(const ParsePosition& that) const;
+
+ /**
+ * Equality operator.
+ * @return TRUE if the two parse positions are not equal, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool operator!=(const ParsePosition& that) const;
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ ParsePosition *clone() const;
+
+ /**
+ * Retrieve the current parse position. On input to a parse method, this
+ * is the index of the character at which parsing will begin; on output, it
+ * is the index of the character following the last character parsed.
+ * @return the current index.
+ * @stable ICU 2.0
+ */
+ int32_t getIndex(void) const;
+
+ /**
+ * Set the current parse position.
+ * @param index the new index.
+ * @stable ICU 2.0
+ */
+ void setIndex(int32_t index);
+
+ /**
+ * Set the index at which a parse error occurred. Formatters
+ * should set this before returning an error code from their
+ * parseObject method. The default value is -1 if this is not
+ * set.
+ * @stable ICU 2.0
+ */
+ void setErrorIndex(int32_t ei);
+
+ /**
+ * Retrieve the index at which an error occurred, or -1 if the
+ * error index has not been set.
+ * @stable ICU 2.0
+ */
+ int32_t getErrorIndex(void) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ /**
+ * Input: the place you start parsing.
+ *
Output: position where the parse stopped.
+ * This is designed to be used serially,
+ * with each call setting index up for the next one.
+ */
+ int32_t index;
+
+ /**
+ * The index at which a parse error occurred.
+ */
+ int32_t errorIndex;
+
+};
+
+inline ParsePosition&
+ParsePosition::operator=(const ParsePosition& copy)
+{
+ index = copy.index;
+ errorIndex = copy.errorIndex;
+ return *this;
+}
+
+inline UBool
+ParsePosition::operator==(const ParsePosition& copy) const
+{
+ if(index != copy.index || errorIndex != copy.errorIndex)
+ return FALSE;
+ else
+ return TRUE;
+}
+
+inline UBool
+ParsePosition::operator!=(const ParsePosition& copy) const
+{
+ return !operator==(copy);
+}
+
+inline int32_t
+ParsePosition::getIndex() const
+{
+ return index;
+}
+
+inline void
+ParsePosition::setIndex(int32_t offset)
+{
+ this->index = offset;
+}
+
+inline int32_t
+ParsePosition::getErrorIndex() const
+{
+ return errorIndex;
+}
+
+inline void
+ParsePosition::setErrorIndex(int32_t ei)
+{
+ this->errorIndex = ei;
+}
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/platform.h b/jni/EastAsianWidth/unicode/platform.h
new file mode 100644
index 0000000..7f7a0c0
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/platform.h
@@ -0,0 +1,325 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : platform.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief Basic types for the platform
+ */
+
+/* Define the platform we're on. */
+#ifndef U_LINUX
+#define U_LINUX
+#endif
+
+/* Define whether inttypes.h is available */
+#ifndef U_HAVE_INTTYPES_H
+#define U_HAVE_INTTYPES_H 1
+#endif
+
+/*
+ * Define what support for C++ streams is available.
+ * If U_IOSTREAM_SOURCE is set to 199711, then length
+ * character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for length
+ * Unicode characters.
+ * @param length The number of characters to convert; this may
+ * include the terminating NUL.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to length
+ * Unicode characters that can be encoded with the
+ * codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for length
+ * character bytes.
+ * @param length The number of characters to convert; this may
+ * include the terminating NUL.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/pwin32.h b/jni/EastAsianWidth/unicode/pwin32.h
new file mode 100644
index 0000000..9aad353
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/pwin32.h
@@ -0,0 +1,311 @@
+/*
+ ******************************************************************************
+ *
+ * Copyright (C) 1997-2007, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ ******************************************************************************
+ *
+ * FILE NAME : platform.h
+ *
+ * Date Name Description
+ * 05/13/98 nos Creation (content moved here from ptypes.h).
+ * 03/02/99 stephen Added AS400 support.
+ * 03/30/99 stephen Added Linux support.
+ * 04/13/99 stephen Reworked for autoconf.
+ ******************************************************************************
+ */
+
+ /**
+ * \file
+ * \brief Configuration constants for the Windows platform
+ */
+
+/* Define the platform we're on. */
+#ifndef U_WINDOWS
+#define U_WINDOWS
+#endif
+
+#if defined(__BORLANDC__)
+#define U_HAVE_PLACEMENT_NEW 0
+#define U_HAVE_INTTYPES_H 1
+#define __STDC_CONSTANT_MACROS
+#endif
+
+/* _MSC_VER is used to detect the Microsoft compiler. */
+#if defined(_MSC_VER)
+#define U_INT64_IS_LONG_LONG 0
+#else
+#define U_INT64_IS_LONG_LONG 1
+#endif
+
+/* Define whether inttypes.h is available */
+#ifndef U_HAVE_INTTYPES_H
+#define U_HAVE_INTTYPES_H 0
+#endif
+
+/*
+ * Define what support for C++ streams is available.
+ * If U_IOSTREAM_SOURCE is set to 199711, then getRuleStatus() can be called after obtaining a boundary
+ * position from next(), previous(), or
+ * any other break iterator functions that returns a boundary position.
+ * Replaceable is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters. It is used by APIs that
+ * change a piece of text while retaining metadata. Metadata is data
+ * other than the Unicode characters returned by char32At(). One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * Replaceable API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters. For example, if the string "the bold
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * strong font".
+ *
+ * Replaceable specifies ranges using a start
+ * offset and a limit offset. The range of characters thus specified
+ * includes the characters at offset start..limit-1. That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * Replaceable also includes API to access characters
+ * in the string: length(), charAt(),
+ * char32At(), and extractBetween().
+ *
+ * replace() is the following:
+ *
+ *
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~Replaceable();
+
+ /**
+ * Returns the number of 16-bit code units in the text.
+ * @return number of 16-bit code units in text
+ * @stable ICU 1.8
+ */
+ inline int32_t length() const;
+
+ /**
+ * Returns the 16-bit code unit at the given offset into the text.
+ * @param offset an integer between 0 and
+ * length()-1
+ * inclusive
+ * @return 16-bit code unit of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Returns the 32-bit code point at the given 16-bit offset into
+ * the text. This assumes the text is stored as 16-bit code units
+ * with surrogate pairs intermixed. If the offset of a leading or
+ * trailing code unit of a surrogate pair is given, return the
+ * code point of the surrogate pair.
+ *
+ * @param offset an integer between 0 and length()-1
+ * inclusive
+ * @return 32-bit code point of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Copies characters in the range [start, limit)
+ * into the UnicodeString target.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to target
+ * @stable ICU 2.1
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const = 0;
+
+ /**
+ * Replaces a substring of this object with the given text. If the
+ * characters being replaced have metadata, the new characters
+ * that replace them should be given the same metadata.
+ *
+ * 0 <= start
+ * <= limit.
+ * @param limit the ending index, exclusive; start <= limit
+ * <= length().
+ * @param text the text to replace characters start
+ * to limit - 1
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) = 0;
+ // Note: All other methods in this class take the names of
+ // existing UnicodeString methods. This method is the exception.
+ // It is named differently because all replace methods of
+ // UnicodeString return a UnicodeString&. The 'between' is
+ // required in order to conform to the UnicodeString naming
+ // convention; API taking start/length are named 0 <= start <=
+ * limit.
+ * @param limit the ending index, exclusive; start <= limit <=
+ * length().
+ * @param dest the destination index. The characters from
+ * start..limit-1 will be copied to dest.
+ * Implementations of this method may assume that dest <= start ||
+ * dest >= limit.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+ /**
+ * Returns true if this object contains metadata. If a
+ * Replaceable object has metadata, calls to the Replaceable API
+ * must be made so as to preserve metadata. If it does not, calls
+ * to the Replaceable API may be optimized to improve performance.
+ * The default implementation returns true.
+ * @return true if this object contains metadata
+ * @stable ICU 2.2
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a pointer to a Replaceable
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+protected:
+
+ /**
+ * Default constructor.
+ * @stable ICU 2.4
+ */
+ Replaceable();
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ Replaceable &Replaceable::operator=(const Replaceable &);
+ */
+
+ /**
+ * Virtual version of length().
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const = 0;
+
+ /**
+ * Virtual version of charAt().
+ * @stable ICU 2.4
+ */
+ virtual UChar getCharAt(int32_t offset) const = 0;
+
+ /**
+ * Virtual version of char32At().
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline int32_t
+Replaceable::length() const {
+ return getLength();
+}
+
+inline UChar
+Replaceable::charAt(int32_t offset) const {
+ return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+ return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/resbund.h b/jni/EastAsianWidth/unicode/resbund.h
new file mode 100644
index 0000000..6d6b991
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/resbund.h
@@ -0,0 +1,485 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2007, International Business Machines Corporation
+* and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File resbund.h
+*
+* CREATED BY
+* Richard Gillam
+*
+* Modification History:
+*
+* Date Name Description
+* 2/5/97 aliu Added scanForLocaleInFile. Added
+* constructor which attempts to read resource bundle
+* from a specific file, without searching other files.
+* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
+* infinite loops in scanForFile and scanForLocale.
+* Modified getRawResourceData to not delete storage
+* in localeData and resourceData which it doesn't own.
+* Added Mac compatibility #ifdefs for tellp() and
+* ios::nocreate.
+* 2/18/97 helena Updated with 100% documentation coverage.
+* 3/13/97 aliu Rewrote to load in entire resource bundle and store
+* it as a Hashtable of ResourceBundleData objects.
+* Added state table to govern parsing of files.
+* Modified to load locale index out of new file
+* distinct from default.txt.
+* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone
+* data. Added support for custom file suffixes. Again,
+* needed to support timezone data.
+* 4/7/97 aliu Cleaned up.
+* 03/02/99 stephen Removed dependency on FILE*.
+* 03/29/99 helena Merged Bertrand and Stephen's changes.
+* 06/11/99 stephen Removed parsing of .txt files.
+* Reworked to use new binary format.
+* Cleaned up.
+* 06/14/99 stephen Removed methods taking a filename suffix.
+* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
+******************************************************************************
+*/
+
+#ifndef RESBUND_H
+#define RESBUND_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+
+/**
+ * \file
+ * \brief C++ API: Resource Bundle
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A class representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specfic information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * @param locale This is the locale this resource bundle is for. To get resources
+ * for the French locale, for example, you would create a
+ * ResourceBundle passing Locale::FRENCH for the "locale" parameter,
+ * and all subsequent calls to that resource bundle will return
+ * resources that pertain to the French locale. If the caller doesn't
+ * pass a locale parameter, the default locale for the system (as
+ * returned by Locale::getDefault()) will be used.
+ * @param err The Error Code.
+ * The UErrorCode& err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational error results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data was used; neither the requested locale nor any of its
+ * fall back locales could be found.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const UnicodeString& packageName,
+ const Locale& locale,
+ UErrorCode& err);
+
+ /**
+ * Construct a resource bundle for the default bundle in the specified package.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const UnicodeString& packageName,
+ UErrorCode& err);
+
+ /**
+ * Construct a resource bundle for the ICU default bundle.
+ *
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(UErrorCode &err);
+
+ /**
+ * Standard constructor, onstructs a resource bundle for the locale-specific
+ * bundle in the specified package.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * NULL is used to refer to ICU data.
+ * @param locale The locale for which to open a resource bundle.
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const char* packageName,
+ const Locale& locale,
+ UErrorCode& err);
+
+ /**
+ * Copy constructor.
+ *
+ * @param original The resource bundle to copy.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const ResourceBundle &original);
+
+ /**
+ * Constructor from a C UResourceBundle. The resource bundle is
+ * copied and not adopted. ures_close will still need to be used on the
+ * original resource bundle.
+ *
+ * @param res A pointer to the C resource bundle.
+ * @param status A UErrorCode value.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(UResourceBundle *res,
+ UErrorCode &status);
+
+ /**
+ * Assignment operator.
+ *
+ * @param other The resource bundle to copy.
+ * @stable ICU 2.0
+ */
+ ResourceBundle&
+ operator=(const ResourceBundle& other);
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ResourceBundle();
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ ResourceBundle *clone() const;
+
+ /**
+ * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
+ * the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
+ *
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+ int32_t
+ getSize(void) const;
+
+ /**
+ * returns a string from a string resource type
+ *
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a warning
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getString(UErrorCode& status) const;
+
+ /**
+ * returns a binary data from a resource. Can be used at most primitive resource types (binaries,
+ * strings, ints)
+ *
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a warning
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ const uint8_t*
+ getBinary(int32_t& len, UErrorCode& status) const;
+
+
+ /**
+ * returns an integer vector from a resource.
+ *
+ * @param len fills in the length of resulting integer vector
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a warning
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a pointer to a vector of integers that lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ const int32_t*
+ getIntVector(int32_t& len, UErrorCode& status) const;
+
+ /**
+ * returns an unsigned integer from a resource.
+ * This integer is originally 28 bits.
+ *
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a warning
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return an unsigned integer value
+ * @stable ICU 2.0
+ */
+ uint32_t
+ getUInt(UErrorCode& status) const;
+
+ /**
+ * returns a signed integer from a resource.
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a warning
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a signed integer value
+ * @stable ICU 2.0
+ */
+ int32_t
+ getInt(UErrorCode& status) const;
+
+ /**
+ * Checks whether the resource has another element to iterate over.
+ *
+ * @return TRUE if there are more elements, FALSE if there is no more elements
+ * @stable ICU 2.0
+ */
+ UBool
+ hasNext(void) const;
+
+ /**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @stable ICU 2.0
+ */
+ void
+ resetIterator(void);
+
+ /**
+ * Returns the key associated with this resource. Not all the resources have a key - only
+ * those that are members of a table.
+ *
+ * @return a key associated to this resource, or NULL if it doesn't have a key
+ * @stable ICU 2.0
+ */
+ const char*
+ getKey(void) const;
+
+ /**
+ * Gets the locale ID of the resource bundle as a string.
+ * Same as getLocale().getName() .
+ *
+ * @return the locale ID of the resource bundle as a string
+ * @stable ICU 2.0
+ */
+ const char*
+ getName(void) const;
+
+
+ /**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @return type of the given resource.
+ * @stable ICU 2.0
+ */
+ UResType
+ getType(void) const;
+
+ /**
+ * Returns the next resource in a given resource or NULL if there are no more resources
+ *
+ * @param status fills in the outgoing error code
+ * @return ResourceBundle object.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ getNext(UErrorCode& status);
+
+ /**
+ * Returns the next string in a resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getNextString(UErrorCode& status);
+
+ /**
+ * Returns the next string in a resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param key fill in for key associated with this string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getNextString(const char ** key,
+ UErrorCode& status);
+
+ /**
+ * Returns the resource in a resource at the specified index.
+ *
+ * @param index an index to the wanted resource.
+ * @param status fills in the outgoing error code
+ * @return ResourceBundle object. If there is an error, resource is invalid.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ get(int32_t index,
+ UErrorCode& status) const;
+
+ /**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param index an index to the wanted string.
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getStringEx(int32_t index,
+ UErrorCode& status) const;
+
+ /**
+ * Returns a resource in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param key a key associated with the wanted resource
+ * @param status fills in the outgoing error code.
+ * @return ResourceBundle object. If there is an error, resource is invalid.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ get(const char* key,
+ UErrorCode& status) const;
+
+ /**
+ * Returns a string in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param key a key associated with the wanted string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getStringEx(const char* key,
+ UErrorCode& status) const;
+
+ /**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use getVersion, as this method is going to be deprecated.
+ *
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see getVersion
+ * @deprecated ICU 2.8 Use getVersion instead.
+ */
+ const char*
+ getVersionNumber(void) const;
+
+ /**
+ * Return the version number associated with this ResourceBundle as a UVersionInfo array.
+ *
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ * as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+ void
+ getVersion(UVersionInfo versionInfo) const;
+
+ /**
+ * Return the Locale associated with this ResourceBundle.
+ *
+ * @return a Locale object
+ * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
+ */
+ const Locale&
+ getLocale(void) const;
+
+ /**
+ * Return the Locale associated with this ResourceBundle.
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ *
+ * @return a Locale object
+ * @stable ICU 2.8
+ */
+ const Locale
+ getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+ /**
+ * This API implements multilevel fallback
+ * @internal
+ */
+ ResourceBundle
+ getWithFallback(const char* key, UErrorCode& status);
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+ ResourceBundle(); // default constructor not implemented
+
+ UResourceBundle *fResource;
+ void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
+ Locale *fLocale;
+
+};
+
+U_NAMESPACE_END
+#endif
diff --git a/jni/EastAsianWidth/unicode/schriter.h b/jni/EastAsianWidth/unicode/schriter.h
new file mode 100644
index 0000000..d0b5e22
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/schriter.h
@@ -0,0 +1,187 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File schriter.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/05/99 stephen Cleaned up.
+******************************************************************************
+*/
+
+#ifndef SCHRITER_H
+#define SCHRITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/chariter.h"
+#include "unicode/uchriter.h"
+
+/**
+ * \file
+ * \brief C++ API: String Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a UnicodeString.
+ * It's possible not only to create an
+ * iterator that iterates over an entire UnicodeString, but also to
+ * create one that iterates over only a subrange of a UnicodeString
+ * (iterators over different subranges of the same UnicodeString don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
+public:
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The UnicodeString object is copied.
+ * The iteration range is the whole string, and the starting position is 0.
+ * @param textStr The unicode string used to create an iterator
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr);
+
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The iteration range is the whole string, and the starting
+ * position is specified by "textPos". If "textPos" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textStr The unicode string used to create an iterator
+ * @param textPos The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textPos);
+
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The UnicodeString object is copied.
+ * The iteration range begins with the code unit specified by
+ * "textBegin" and ends with the code unit BEFORE the code unit specfied
+ * by "textEnd". The starting position is specified by "textPos". If
+ * "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
+ * textBegin >= textEnd or either is negative or greater than text.size()),
+ * or "textPos" is outside the range defined by "textBegin" and "textEnd",
+ * the behavior of this iterator is undefined.
+ * @param textStr The unicode string used to create the StringCharacterIterator
+ * @param textBegin The begin position of the iteration range
+ * @param textEnd The end position of the iteration range
+ * @param textPos The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t textPos);
+
+ /**
+ * Copy constructor. The new iterator iterates over the same range
+ * of the same string as "that", and its initial position is the
+ * same as "that"'s current position.
+ * The UnicodeString object in "that" is copied.
+ * @param that The StringCharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const StringCharacterIterator& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~StringCharacterIterator();
+
+ /**
+ * Assignment operator. *this is altered to iterate over the same
+ * range of the same string as "that", and refers to the same
+ * character within that string as "that" does.
+ * @param that The object to be copied.
+ * @return the newly created object.
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator&
+ operator=(const StringCharacterIterator& that);
+
+ /**
+ * Returns true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Returns a new StringCharacterIterator referring to the same
+ * character in the same range of the same string as this one. The
+ * caller must delete the new iterator.
+ * @return the newly cloned object.
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone(void) const;
+
+ /**
+ * Sets the iterator to iterate over the provided string.
+ * @param newText The string to be iterated over
+ * @stable ICU 2.0
+ */
+ void setText(const UnicodeString& newText);
+
+ /**
+ * Copies the UnicodeString under iteration into the UnicodeString
+ * referred to by "result". Even if this iterator iterates across
+ * only a part of this string, the whole string is copied.
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result);
+
+ /**
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+ /**
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+protected:
+ /**
+ * Default constructor, iteration over empty string.
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator();
+
+ /**
+ * Sets the iterator to iterate over the provided string.
+ * @param newText The string to be iterated over
+ * @param newTextLength The length of the String
+ * @stable ICU 2.0
+ */
+ void setText(const UChar* newText, int32_t newTextLength);
+
+ /**
+ * Copy of the iterated string object.
+ * @stable ICU 2.0
+ */
+ UnicodeString text;
+
+};
+
+U_NAMESPACE_END
+#endif
diff --git a/jni/EastAsianWidth/unicode/strenum.h b/jni/EastAsianWidth/unicode/strenum.h
new file mode 100644
index 0000000..ce42195
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/strenum.h
@@ -0,0 +1,271 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: String Enumeration
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api. Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * Bidi algorithm for ICU
+ *
+ * This is an implementation of the Unicode Bidirectional algorithm.
+ * The algorithm is defined in the
+ * Unicode Standard Annex #9,
+ * version 13, also described in The Unicode Standard, Version 4.0 .General remarks about the API:
+ *
+ * In functions with an error code parameter,
+ * the pErrorCode pointer must be valid
+ * and the value that it points to must not indicate a failure before
+ * the function call. Otherwise, the function returns immediately.
+ * After the function call, the value indicates success or failure. Sample code for the ICU Bidi API
+ *
+ * Rendering a paragraph with the ICU Bidi API
+ *
+ * This is (hypothetical) sample code that illustrates
+ * how the ICU Bidi API could be used to render a paragraph of text.
+ * Rendering code depends highly on the graphics system,
+ * therefore this sample code must make a lot of assumptions,
+ * which may or may not match any existing graphics system's properties.
+ *
+ *
+ *
+ *
+ *
+ * \code
+ *#include "unicode/ubidi.h"
+ *
+ *typedef enum {
+ * styleNormal=0, styleSelected=1,
+ * styleBold=2, styleItalics=4,
+ * styleSuper=8, styleSub=16
+ *} Style;
+ *
+ *typedef struct { int32_t limit; Style style; } StyleRun;
+ *
+ *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
+ * const StyleRun *styleRuns, int styleRunCount);
+ *
+ * // set *pLimit and *pStyleRunLimit for a line
+ * // from text[start] and from styleRuns[styleRunStart]
+ * // using ubidi_getLogicalRun(para, ...)
+ *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
+ * UBiDi *para,
+ * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
+ * int *pLineWidth);
+ *
+ * // render runs on a line sequentially, always from left to right
+ *
+ * // prepare rendering a new line
+ * void startLine(UBiDiDirection textDirection, int lineWidth);
+ *
+ * // render a run of text and advance to the right by the run width
+ * // the text[start..limit-1] is always in logical order
+ * void renderRun(const UChar *text, int32_t start, int32_t limit,
+ * UBiDiDirection textDirection, Style style);
+ *
+ * // We could compute a cross-product
+ * // from the style runs with the directional runs
+ * // and then reorder it.
+ * // Instead, here we iterate over each run type
+ * // and render the intersections -
+ * // with shortcuts in simple (and common) cases.
+ * // renderParagraph() is the main function.
+ *
+ * // render a directional run with
+ * // (possibly) multiple style runs intersecting with it
+ * void renderDirectionalRun(const UChar *text,
+ * int32_t start, int32_t limit,
+ * UBiDiDirection direction,
+ * const StyleRun *styleRuns, int styleRunCount) {
+ * int i;
+ *
+ * // iterate over style runs
+ * if(direction==UBIDI_LTR) {
+ * int styleLimit;
+ *
+ * for(i=0; i
+ */
+
+/*DOCXX_TAG*/
+/*@{*/
+
+/**
+ * UBiDiLevel is the type of the level values in this
+ * Bidi implementation.
+ * It holds an embedding level and indicates the visual direction
+ * by its bit 0 (even/odd value).paraLevel and embeddingLevels
+ * arguments of ubidi_setPara(); there:
+ *
+ *
+ *
+ * @see ubidi_setPara
+ *
+ * embeddingLevels[]
+ * value indicates whether the using application is
+ * specifying the level of a character to override whatever the
+ * Bidi implementation would resolve it to.paraLevel can be set to the
+ * pseudo-level values UBIDI_DEFAULT_LTR
+ * and UBIDI_DEFAULT_RTL.UBIDI_DEFAULT_XXX can be used to specify
+ * a default for the paragraph level for
+ * when the ubidi_setPara() function
+ * shall determine it but there is no
+ * strongly typed character in the input.UBIDI_DEFAULT_LTR is even
+ * and the one for UBIDI_DEFAULT_RTL is odd,
+ * just like with normal LTR and RTL level values -
+ * these special values are designed that way. Also, the implementation
+ * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
+ *
+ * @see UBIDI_DEFAULT_LTR
+ * @see UBIDI_DEFAULT_RTL
+ * @see UBIDI_LEVEL_OVERRIDE
+ * @see UBIDI_MAX_EXPLICIT_LEVEL
+ * @stable ICU 2.0
+ */
+typedef uint8_t UBiDiLevel;
+
+/** Paragraph level setting.UBIDI_REORDER_INVERSE_LIKE_DIRECT or
+ * UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, the direction will be LTR otherwise.UBIDI_OPTION_INSERT_MARKS is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_LTR 0xfe
+
+/** Paragraph level setting.UBIDI_REORDER_INVERSE_LIKE_DIRECT or
+ * UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, or if the text contains no strong character;
+ * the direction will be LTR otherwise.UBIDI_OPTION_INSERT_MARKS is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_RTL 0xff
+
+/**
+ * Maximum explicit embedding level.
+ * (The maximum resolved level can be up to UBIDI_MAX_EXPLICIT_LEVEL+1).
+ * @stable ICU 2.0
+ */
+#define UBIDI_MAX_EXPLICIT_LEVEL 61
+
+/** Bit flag for level input.
+ * Overrides directional properties.
+ * @stable ICU 2.0
+ */
+#define UBIDI_LEVEL_OVERRIDE 0x80
+
+/**
+ * Special value which can be returned by the mapping functions when a logical
+ * index has no corresponding visual index or vice-versa. This may happen
+ * for the logical-to-visual mapping of a Bidi control when option
+ * #UBIDI_OPTION_REMOVE_CONTROLS is specified. This can also happen
+ * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
+ * by option #UBIDI_OPTION_INSERT_MARKS.
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getVisualMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getLogicalMap
+ * @stable ICU 3.6
+ */
+#define UBIDI_MAP_NOWHERE (-1)
+
+/**
+ * UBiDiDirection values indicate the text direction.
+ * @stable ICU 2.0
+ */
+enum UBiDiDirection {
+ /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */
+ UBIDI_LTR,
+ /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */
+ UBIDI_RTL,
+ /** Mixed-directional text. @stable ICU 2.0 */
+ UBIDI_MIXED
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBiDiDirection UBiDiDirection;
+
+/**
+ * Forward declaration of the UBiDi structure for the declaration of
+ * the API functions. Its fields are implementation-specific.UBiDi structure.
+ * Such an object is initially empty. It is assigned
+ * the Bidi properties of a piece of text containing one or more paragraphs
+ * by ubidi_setPara()
+ * or the Bidi properties of a line within a paragraph by
+ * ubidi_setLine().ubidi_close().ubidi_setPara() and ubidi_setLine() will allocate
+ * additional memory for internal structures as necessary.
+ *
+ * @return An empty UBiDi object.
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDi * U_EXPORT2
+ubidi_open(void);
+
+/**
+ * Allocate a UBiDi structure with preallocated memory
+ * for internal structures.
+ * This function provides a UBiDi object like ubidi_open()
+ * with no arguments, but it also preallocates memory for internal structures
+ * according to the sizings supplied by the caller.maxRunCount cannot be reasonably predetermined and should not
+ * be set to maxLength (the only failproof value) to avoid
+ * wasting memory, then maxRunCount could be set to 0 here
+ * and the internal structures that are associated with it will be allocated
+ * on demand, just like with ubidi_open().
+ *
+ * @param maxLength is the maximum text or line length that internal memory
+ * will be preallocated for. An attempt to associate this object with a
+ * longer text will fail, unless this value is 0, which leaves the allocation
+ * up to the implementation.
+ *
+ * @param maxRunCount is the maximum anticipated number of same-level runs
+ * that internal memory will be preallocated for. An attempt to access
+ * visual runs on an object that was not preallocated for as many runs
+ * as the text was actually resolved to will fail,
+ * unless this value is 0, which leaves the allocation up to the implementation.
+ * The number of runs depends on the actual text and maybe anywhere between
+ * 1 and maxLength. It is typically small.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return An empty UBiDi object with preallocated memory.
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDi * U_EXPORT2
+ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
+
+/**
+ * ubidi_close() must be called to free the memory
+ * associated with a UBiDi object.UBiDi object must not be destroyed or reused if
+ * it still has children.
+ * If a UBiDi object has become the child
+ * of another one (its parent) by calling
+ * ubidi_setLine(), then the child object must
+ * be destroyed (closed) or reused (by calling
+ * ubidi_setPara() or ubidi_setLine())
+ * before the parent object.
+ *
+ * @param pBiDi is a UBiDi object.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_close(UBiDi *pBiDi);
+
+/**
+ * Modify the operation of the Bidi algorithm such that it
+ * approximates an "inverse Bidi" algorithm. This function
+ * must be called before ubidi_setPara().
+ *
+ * isInverse set to TRUE,
+ * this function changes the behavior of some of the subsequent functions
+ * in a way that they can be used for the inverse Bidi algorithm.
+ * Specifically, runs of text with numeric characters will be treated in a
+ * special way and may need to be surrounded with LRM characters when they are
+ * written in reordered sequence.ubidi_getVisualRun().
+ * Since the actual input for "inverse Bidi" is visually ordered text and
+ * ubidi_getVisualRun() gets the reordered runs, these are actually
+ * the runs of the logically ordered output.isInverse set to
+ * TRUE is equivalent to calling
+ * ubidi_setReorderingMode with argument
+ * reorderingMode
+ * set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
+ * Calling this function with argument isInverse set to
+ * FALSE is equivalent to calling
+ * ubidi_setReorderingMode with argument
+ * reorderingMode
+ * set to #UBIDI_REORDER_DEFAULT.
+ *
+ * @param pBiDi is a UBiDi object.
+ *
+ * @param isInverse specifies "forward" or "inverse" Bidi operation.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
+
+/**
+ * Is this Bidi object set to perform the inverse Bidi algorithm?
+ * ubidi_setReorderingMode will return TRUE if the
+ * reordering mode was set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
+ * FALSE for all other values.UBiDi object.
+ * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm
+ * by handling numbers as L.
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+
+U_STABLE UBool U_EXPORT2
+ubidi_isInverse(UBiDi *pBiDi);
+
+/**
+ * Specify whether block separators must be allocated level zero,
+ * so that successive paragraphs will progress from left to right.
+ * This function must be called before ubidi_setPara().
+ * Paragraph separators (B) may appear in the text. Setting them to level zero
+ * means that all paragraph separators (including one possibly appearing
+ * in the last text position) are kept in the reordered text after the text
+ * that they follow in the source text.
+ * When this feature is not enabled, a paragraph separator at the last
+ * position of the text before reordering will go to the first position
+ * of the reordered text when the paragraph level is odd.
+ *
+ * @param pBiDi is a UBiDi object.
+ *
+ * @param orderParagraphsLTR specifies whether paragraph separators (B) must
+ * receive level 0, so that successive paragraphs progress from left to right.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
+
+/**
+ * Is this Bidi object set to allocate level 0 to block separators so that
+ * successive paragraphs progress from left to right?
+ *
+ * @param pBiDi is a UBiDi object.
+ * @return TRUE if the Bidi object is set to allocate level 0 to block
+ * separators.
+ *
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
+
+/**
+ * UBiDiReorderingMode values indicate which variant of the Bidi
+ * algorithm to use.
+ *
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingMode {
+ /** Regular Logical to Visual Bidi algorithm according to Unicode.
+ * This is a 0 value.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_DEFAULT = 0,
+ /** Logical to Visual algorithm which handles numbers in a way which
+ * mimicks the behavior of Windows XP.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_NUMBERS_SPECIAL,
+ /** Logical to Visual algorithm grouping numbers with adjacent R characters
+ * (reversible algorithm).
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
+ /** Reorder runs only to transform a Logical LTR string to the Logical RTL
+ * string with the same display, or vice-versa.
+ * If this mode is set together with option
+ * #UBIDI_OPTION_INSERT_MARKS, some Bidi controls in the source
+ * text may be removed and other controls may be added to produce the
+ * minimum combination which has the required display.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_RUNS_ONLY,
+ /** Visual to Logical algorithm which handles numbers like L
+ * (same algorithm as selected by ubidi_setInverse(TRUE).
+ * @see ubidi_setInverse
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
+ /** Visual to Logical algorithm equivalent to the regular Logical to Visual
+ * algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_LIKE_DIRECT,
+ /** Inverse Bidi (Visual to Logical) algorithm for the
+ * UBIDI_REORDER_NUMBERS_SPECIAL Bidi algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
+ /** Number of values for reordering mode.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_COUNT
+} UBiDiReorderingMode;
+
+/**
+ * Modify the operation of the Bidi algorithm such that it implements some
+ * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
+ * algorithm, depending on different values of the "reordering mode".
+ * This function must be called before ubidi_setPara(), and stays
+ * in effect until called again with a different argument.
+ *
+ * #UBIDI_REORDER_DEFAULT, this function changes the behavior of
+ * some of the subsequent functions in a way such that they implement an
+ * inverse Bidi algorithm or some other algorithm variants.
+ *
+ *
+ * #UBIDI_REORDER_DEFAULT,
+ * the standard Bidi Logical to Visual algorithm is applied.#UBIDI_REORDER_NUMBERS_SPECIAL,
+ * the algorithm used to perform Bidi transformations when calling
+ * ubidi_setPara should approximate the algorithm used in
+ * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
+ * algorithm.
+ *
+ * The differences between the basic algorithm and the algorithm addressed
+ * by this option are as follows:
+ *
+ *
#UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
+ * numbers located between LTR text and RTL text are associated with the RTL
+ * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
+ * upper case letters represent RTL characters) will be transformed to
+ * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
+ * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
+ * This makes the algorithm reversible and makes it useful when round trip
+ * (from visual to logical and back to visual) must be achieved without
+ * adding LRM characters. However, this is a variation from the standard
+ * Unicode Bidi algorithm.
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.#UBIDI_REORDER_RUNS_ONLY,
+ * a "Logical to Logical" transformation must be performed:
+ *
+ *
+ * This mode may be needed when logical text which is basically Arabic or
+ * Hebrew, with possible included numbers or phrases in English, has to be
+ * displayed as if it had an even embedding level (this can happen if the
+ * displaying application treats all text as if it was basically LTR).
+ * paraLevel
+ * in ubidi_setPara) is even, the source text will be handled as
+ * LTR logical text and will be transformed to the RTL logical text which has
+ * the same LTR visual display.
+ * This mode may also be needed in the reverse case, when logical text which is
+ * basically English, with possible included phrases in Arabic or Hebrew, has to
+ * be displayed as if it had an odd embedding level.
+ *
+ * Both cases could be handled by adding LRE or RLE at the head of the text,
+ * if the display subsystem supports these formatting controls. If it does not,
+ * the problem may be handled by transforming the source text in this mode
+ * before displaying it, so that it will be displayed properly.
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.#UBIDI_REORDER_INVERSE_NUMBERS_AS_L, an "inverse Bidi" algorithm
+ * is applied.
+ * Runs of text with numeric characters will be treated like LTR letters and
+ * may need to be surrounded with LRM characters when they are written in
+ * reordered sequence (the option #UBIDI_INSERT_LRM_FOR_NUMERIC can
+ * be used with function ubidi_writeReordered to this end. This
+ * mode is equivalent to calling ubidi_setInverse() with
+ * argument isInverse set to TRUE.#UBIDI_REORDER_INVERSE_LIKE_DIRECT, the "direct" Logical to Visual
+ * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
+ * This mode is similar to mode #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
+ * but is closer to the regular Bidi algorithm.
+ *
+ * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
+ * upper case represents RTL characters) will be transformed to
+ * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
+ * with mode UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
+ * When used in conjunction with option
+ * #UBIDI_OPTION_INSERT_MARKS, this mode generally
+ * adds Bidi marks to the output significantly more sparingly than mode
+ * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L with option
+ * #UBIDI_INSERT_LRM_FOR_NUMERIC in calls to
+ * ubidi_writeReordered.#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the Logical to Visual
+ * Bidi algorithm used in Windows XP is used as an approximation of an
+ * "inverse Bidi" algorithm.
+ *
+ * For example, an LTR paragraph with the content "abc FED123" (where
+ * upper case represents RTL characters) will be transformed to
+ * "abc 123DEF.UBIDI_REORDER_INVERSE),
+ * output runs should be retrieved using
+ * ubidi_getVisualRun(), and the output text with
+ * ubidi_writeReordered(). The caller should keep in mind that in
+ * "inverse Bidi" modes the input is actually visually ordered text and
+ * reordered output returned by ubidi_getVisualRun() or
+ * ubidi_writeReordered() are actually runs or character string
+ * of logically ordered output.
+ * For all the "inverse Bidi" modes, the source text should not contain
+ * Bidi control characters other than LRM or RLM.#UBIDI_OUTPUT_REVERSE of
+ * ubidi_writeReordered has no useful meaning and should not be
+ * used in conjunction with any value of the reordering mode specifying
+ * "inverse Bidi" or with value UBIDI_REORDER_RUNS_ONLY.
+ *
+ * @param pBiDi is a UBiDi object.
+ * @param reorderingMode specifies the required variant of the Bidi algorithm.
+ *
+ * @see UBiDiReorderingMode
+ * @see ubidi_setInverse
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
+
+/**
+ * What is the requested reordering mode for a given Bidi object?
+ *
+ * @param pBiDi is a UBiDi object.
+ * @return the current reordering mode of the Bidi object
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+U_STABLE UBiDiReorderingMode U_EXPORT2
+ubidi_getReorderingMode(UBiDi *pBiDi);
+
+/**
+ * UBiDiReorderingOption values indicate which options are
+ * specified to affect the Bidi algorithm.
+ *
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingOption {
+ /**
+ * option value for ubidi_setReorderingOptions:
+ * disable all the options which can be set with this function
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_DEFAULT = 0,
+
+ /**
+ * option bit for ubidi_setReorderingOptions:
+ * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
+ * a reordering to a Logical order
+ *
+ * ubidi_setPara.
+ *
+ *
+ * #UBIDI_REORDER_RUNS_ONLY#UBIDI_REORDER_INVERSE_NUMBERS_AS_L#UBIDI_REORDER_INVERSE_LIKE_DIRECT#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL#UBIDI_REORDER_INVERSE_NUMBERS_AS_L or with calling
+ * ubidi_setInverse(TRUE), it implies
+ * option #UBIDI_INSERT_LRM_FOR_NUMERIC
+ * in calls to function ubidi_writeReordered().#UBIDI_OPTION_REMOVE_CONTROLS set before calling
+ * ubidi_setPara() or option #UBIDI_REMOVE_BIDI_CONTROLS
+ * in ubidi_writeReordered), the result will be identical to the
+ * source text in the first transformation.
+ *
+ * #UBIDI_OPTION_REMOVE_CONTROLS. It inhibits option
+ * UBIDI_REMOVE_BIDI_CONTROLS in calls to function
+ * ubidi_writeReordered() and it implies option
+ * #UBIDI_INSERT_LRM_FOR_NUMERIC in calls to function
+ * ubidi_writeReordered() if the reordering mode is
+ * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L.ubidi_setReorderingOptions:
+ * remove Bidi control characters
+ *
+ * ubidi_setPara.#UBIDI_OPTION_INSERT_MARKS.
+ * It inhibits option #UBIDI_INSERT_LRM_FOR_NUMERIC in calls
+ * to function ubidi_writeReordered() and it implies option
+ * #UBIDI_REMOVE_BIDI_CONTROLS in calls to that function.ubidi_setReorderingOptions:
+ * process the output as part of a stream to be continued
+ *
+ * ubidi_setPara.ubidi_setPara() may process
+ * less than the full source text in order to truncate the text at a meaningful
+ * boundary. The caller should call ubidi_getProcessedLength()
+ * immediately after calling ubidi_setPara() in order to
+ * determine how much of the source text has been processed.
+ * Source text beyond that length should be resubmitted in following calls to
+ * ubidi_setPara. The processed length may be less than
+ * the length of the source text if a character preceding the last character of
+ * the source text constitutes a reasonable boundary (like a block separator)
+ * for text to be continued.
+ * If the last character of the source text constitutes a reasonable
+ * boundary, the whole text will be processed at once.
+ * If nowhere in the source text there exists
+ * such a reasonable boundary, the processed length will be zero.
+ * The caller should check for such an occurrence and do one of the following:
+ *
+ * In all cases, this option should be turned off before processing the last
+ * part of the text.UBIDI_OPTION_STREAMING.UBIDI_OPTION_STREAMING option is used,
+ * it is recommended to call ubidi_orderParagraphsLTR() with
+ * argument orderParagraphsLTR set to TRUE before
+ * calling ubidi_setPara so that later paragraphs may be
+ * concatenated to previous paragraphs on the right.UBiDi object.
+ * @param reorderingOptions is a combination of zero or more of the following
+ * options:
+ * #UBIDI_OPTION_DEFAULT, #UBIDI_OPTION_INSERT_MARKS,
+ * #UBIDI_OPTION_REMOVE_CONTROLS, #UBIDI_OPTION_STREAMING.
+ *
+ * @see ubidi_getReorderingOptions
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
+
+/**
+ * What are the reordering options applied to a given Bidi object?
+ *
+ * @param pBiDi is a UBiDi object.
+ * @return the current reordering options of the Bidi object
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+U_STABLE uint32_t U_EXPORT2
+ubidi_getReorderingOptions(UBiDi *pBiDi);
+
+/**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * Unicode Standard Anned #9,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.UBiDi object allocated with ubidi_open()
+ * which will be set to contain the reordering information,
+ * especially the resolved levels for all the characters in text.
+ *
+ * @param text is a pointer to the text that the Bidi algorithm will be performed on.
+ * This pointer is stored in the UBiDi object and can be retrieved
+ * with ubidi_getText().
+ * Note: the text must be (at least) length long.
+ *
+ * @param length is the length of the text; if length==-1 then
+ * the text must be zero-terminated.
+ *
+ * @param paraLevel specifies the default level for the text;
+ * it is typically 0 (LTR) or 1 (RTL).
+ * If the function shall determine the paragraph level from the text,
+ * then paraLevel can be set to
+ * either #UBIDI_DEFAULT_LTR
+ * or #UBIDI_DEFAULT_RTL; if the text contains multiple
+ * paragraphs, the paragraph level shall be determined separately for
+ * each paragraph; if a paragraph does not include any strongly typed
+ * character, then the desired default is used (0 for LTR or 1 for RTL).
+ * Any other value between 0 and #UBIDI_MAX_EXPLICIT_LEVEL
+ * is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and override levels,
+ * ignoring characters like LRE and PDF in the text.
+ * A level overrides the directional property of its corresponding
+ * (same index) character if the level has the
+ * #UBIDI_LEVEL_OVERRIDE bit set.
+ * Except for that bit, it must be
+ * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL,
+ * with one exception: a level of zero may be specified for a paragraph
+ * separator even if paraLevel>0 when multiple paragraphs
+ * are submitted in the same call to ubidi_setPara().
+ * Caution: A copy of this pointer, not of the levels,
+ * will be stored in the UBiDi object;
+ * the embeddingLevels array must not be
+ * deallocated before the UBiDi structure is destroyed or reused,
+ * and the embeddingLevels
+ * should not be modified to avoid unexpected results on subsequent Bidi operations.
+ * However, the ubidi_setPara() and
+ * ubidi_setLine() functions may modify some or all of the levels.
+ * After the UBiDi object is reused or destroyed, the caller
+ * must take care of the deallocation of the embeddingLevels array.
+ * Note: the embeddingLevels array must be
+ * at least length long.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
+ UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
+ UErrorCode *pErrorCode);
+
+/**
+ * ubidi_setLine() sets a UBiDi to
+ * contain the reordering information, especially the resolved levels,
+ * for all the characters in a line of text. This line of text is
+ * specified by referring to a UBiDi object representing
+ * this information for a piece of text containing one or more paragraphs,
+ * and by specifying a range of indexes in this text.limit-start-1.ubidi_setPara()
+ * for a piece of text, and after line-breaking on that text.
+ * It is not necessary if each paragraph is treated as a single line.UBiDi object that represents a line.pLineBiDi shares data with
+ * pParaBiDi.
+ * You must destroy or reuse pLineBiDi before pParaBiDi.
+ * In other words, you must destroy or reuse the UBiDi object for a line
+ * before the object for its parent paragraph.pParaBiDi is also copied,
+ * and start is added to it so that it points to the beginning of the
+ * line for this object.
+ *
+ * @param pParaBiDi is the parent paragraph object. It must have been set
+ * by a successful call to ubidi_setPara.
+ *
+ * @param start is the line's first index into the text.
+ *
+ * @param limit is just behind the line's last index into the text
+ * (its last index +1).
+ * It must be 0<=start, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ * It must be UBiDi object.
+ *
+ * @return a value of UBIDI_LTR, UBIDI_RTL
+ * or UBIDI_MIXED
+ * that indicates if the entire text
+ * represented by this object is unidirectional,
+ * and which direction, or if it is mixed-directional.
+ *
+ * @see UBiDiDirection
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiDirection U_EXPORT2
+ubidi_getDirection(const UBiDi *pBiDi);
+
+/**
+ * Get the pointer to the text.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @return The pointer to the text that the UBiDi object was created for.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar * U_EXPORT2
+ubidi_getText(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the text.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @return The length of the text that the UBiDi object was created for.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getLength(const UBiDi *pBiDi);
+
+/**
+ * Get the paragraph level of the text.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @return The paragraph level. If there are multiple paragraphs, their
+ * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
+ * is returned.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getParagraph
+ * @see ubidi_getParagraphByIndex
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiLevel U_EXPORT2
+ubidi_getParaLevel(const UBiDi *pBiDi);
+
+/**
+ * Get the number of paragraphs.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @return The number of paragraphs.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_countParagraphs(UBiDi *pBiDi);
+
+/**
+ * Get a paragraph, given a position within the text.
+ * This function returns information about a paragraph.
+ * Note: if the paragraph index is known, it is more efficient to
+ * retrieve the paragraph information using ubidi_getParagraphByIndex().UBiDi object.
+ *
+ * @param charIndex is the index of a character within the text, in the
+ * range [0..ubidi_getProcessedLength(pBiDi)-1].
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * charIndex.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of the paragraph containing the specified position.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
+ int32_t *pParaLimit, UBiDiLevel *pParaLevel,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get a paragraph, given the index of this paragraph.
+ *
+ * This function returns information about a paragraph.UBiDi object.
+ *
+ * @param paraIndex is the number of the paragraph, in the
+ * range [0..ubidi_countParagraphs(pBiDi)-1].
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
+ int32_t *pParaStart, int32_t *pParaLimit,
+ UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
+
+/**
+ * Get the level for one character.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @param charIndex the index of a character. It must be in the range
+ * [0..ubidi_getProcessedLength(pBiDi)].
+ *
+ * @return The level for the character at charIndex (0 if charIndex is not
+ * in the valid range).
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiLevel U_EXPORT2
+ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
+
+/**
+ * Get an array of levels for each character.ubidi_getLevelAt().
+ *
+ * @param pBiDi is the paragraph or line UBiDi object, whose
+ * text length must be strictly positive.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The levels array for the text,
+ * or NULL if an error occurs.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE const UBiDiLevel * U_EXPORT2
+ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical run.
+ * This function returns information about a run and is used
+ * to retrieve runs in logical order.UBiDi object.
+ *
+ * @param logicalPosition is a logical position within the source text.
+ *
+ * @param pLogicalLimit will receive the limit of the corresponding run.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * logicalPosition.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @param pLevel will receive the level of the corresponding run.
+ * This pointer can be NULL if this
+ * value is not necessary.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
+ int32_t *pLogicalLimit, UBiDiLevel *pLevel);
+
+/**
+ * Get the number of runs.
+ * This function may invoke the actual reordering on the
+ * UBiDi object, after ubidi_setPara()
+ * may have resolved only the levels of the text. Therefore,
+ * ubidi_countRuns() may have to allocate memory,
+ * and may fail doing so.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The number of runs.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get one run's logical start, length, and directionality,
+ * which can be 0 for LTR or 1 for RTL.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.ubidi_countRuns() should be called
+ * before the runs are retrieved.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ * range [0..ubidi_countRuns(pBiDi)-1].
+ *
+ * @param pLogicalStart is the first logical character index in the text.
+ * The pointer may be NULL if this index is not needed.
+ *
+ * @param pLength is the number of characters (at least one) in the run.
+ * The pointer may be NULL if this is not needed.
+ *
+ * @return the directionality of the run,
+ * UBIDI_LTR==0 or UBIDI_RTL==1,
+ * never UBIDI_MIXED.
+ *
+ * @see ubidi_countRuns
+ *
+ * Example:
+ *
+ * \code
+ * int32_t i, count=ubidi_countRuns(pBiDi),
+ * logicalStart, visualIndex=0, length;
+ * for(i=0; i
+ *
+ * Note that in right-to-left runs, code like this places
+ * modifier letters before base characters and second surrogates
+ * before first ones.
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiDirection U_EXPORT2
+ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
+ int32_t *pLogicalStart, int32_t *pLength);
+
+/**
+ * Get the visual position from a logical text position.
+ * If such a mapping is used many times on the same
+ * UBiDi object, then calling
+ * ubidi_getLogicalMap() is more efficient.#UBIDI_MAP_NOWHERE if there is no
+ * visual position because the corresponding text character is a Bidi control
+ * removed from output by the option #UBIDI_OPTION_REMOVE_CONTROLS.
+ * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
+ * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
+ * UBIDI_REMOVE_BIDI_CONTROLS, the visual position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
+ * UBiDi object.
+ *
+ * @param logicalIndex is the index of a character in the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The visual position of this character.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get the logical text position from a visual position.
+ * If such a mapping is used many times on the same
+ * UBiDi object, then calling
+ * ubidi_getVisualMap() is more efficient.#UBIDI_MAP_NOWHERE if there is no
+ * logical position because the corresponding text character is a Bidi mark
+ * inserted in the output by option #UBIDI_OPTION_INSERT_MARKS.
+ * ubidi_getVisualIndex().
+ * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
+ * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
+ * UBIDI_REMOVE_BIDI_CONTROLS, the logical position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @param visualIndex is the visual position of a character.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of this character in the text.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical-to-visual index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * #UBIDI_MAP_NOWHERE if the
+ * corresponding text characters are Bidi controls removed from the visual
+ * output by the option #UBIDI_OPTION_REMOVE_CONTROLS.
+ * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
+ * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
+ * UBIDI_REMOVE_BIDI_CONTROLS, the visual positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @param indexMap is a pointer to an array of ubidi_getProcessedLength()
+ * indexes which will reflect the reordering of the characters.
+ * If option #UBIDI_OPTION_INSERT_MARKS is set, the number
+ * of elements allocated in indexMap must be no less than
+ * ubidi_getResultLength().
+ * The array does not need to be initialized.
+ * The index map will result in indexMap[logicalIndex]==visualIndex.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * Get a visual-to-logical index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * #UBIDI_MAP_NOWHERE if the
+ * corresponding text characters are Bidi marks inserted in the visual output
+ * by the option #UBIDI_OPTION_INSERT_MARKS.
+ * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC,
+ * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE,
+ * UBIDI_REMOVE_BIDI_CONTROLS, the logical positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS.
+ *
+ * @param pBiDi is the paragraph or line UBiDi object.
+ *
+ * @param indexMap is a pointer to an array of ubidi_getResultLength()
+ * indexes which will reflect the reordering of the characters.
+ * If option #UBIDI_OPTION_REMOVE_CONTROLS is set, the number
+ * of elements allocated in indexMap must be no less than
+ * ubidi_getProcessedLength().
+ * The array does not need to be initialized.
+ * The index map will result in indexMap[visualIndex]==logicalIndex.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using ubidi_getLogicalMap() on a
+ * UBiDi object.
+ *
+ * @param levels is an array with length levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be length>0.
+ *
+ * @param indexMap is a pointer to an array of length
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.indexMap[logicalIndex]==visualIndex.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using ubidi_getVisualMap() on a
+ * UBiDi object.
+ *
+ * @param levels is an array with length levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be length>0.
+ *
+ * @param indexMap is a pointer to an array of length
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.indexMap[visualIndex]==logicalIndex.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * Invert an index map.
+ * The index mapping of the first map is inverted and written to
+ * the second one.
+ *
+ * @param srcMap is an array with length elements
+ * which defines the original mapping from a source array containing
+ * length elements to a destination array.
+ * Some elements of the source array may have no mapping in the
+ * destination array. In that case, their value will be
+ * the special value UBIDI_MAP_NOWHERE.
+ * All elements must be >=0 or equal to UBIDI_MAP_NOWHERE.
+ * Some elements may have a value >= length, if the
+ * destination array has more elements than the source array.
+ * There must be no duplicate indexes (two or more elements with the
+ * same value except UBIDI_MAP_NOWHERE).
+ *
+ * @param destMap is an array with a number of elements equal to 1 + the highest
+ * value in srcMap.
+ * destMap will be filled with the inverse mapping.
+ * If element with index i in srcMap has a value k different
+ * from UBIDI_MAP_NOWHERE, this means that element i of
+ * the source array maps to element k in the destination array.
+ * The inverse map will have value i in its k-th element.
+ * For all elements of the destination array which do not map to
+ * an element in the source array, the corresponding element in the
+ * inverse map will have a value equal to UBIDI_MAP_NOWHERE.
+ *
+ * @param length is the length of each array.
+ * @See UBIDI_MAP_NOWHERE
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
+
+/** option flags for ubidi_writeReordered() */
+
+/**
+ * option bit for ubidi_writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_KEEP_BASE_COMBINING 1
+
+/**
+ * option bit for ubidi_writeReordered():
+ * replace characters with the "mirrored" property in RTL runs
+ * by their mirror-image mappings
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_DO_MIRRORING 2
+
+/**
+ * option bit for ubidi_writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ * ubidi_writeReordered()
+ * first without this option, and then calling
+ * ubidi_writeReverse() without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.ubidi_setPara(). This length may be different from the length
+ * of the source text if option #UBIDI_OPTION_STREAMING
+ * has been set.
+ *
+ * Note that whenever the length of the text affects the execution or the
+ * result of a function, it is the processed length which must be considered,
+ * except for ubidi_setPara (which receives unprocessed source
+ * text) and ubidi_getLength (which returns the original length
+ * of the source text).
+ * In particular, the processed length is the one to consider in the following
+ * cases:
+ *
+ *
+ *
+ * @param pBiDi is the paragraph limit argument of
+ * ubidi_setLinecharIndex argument of
+ * ubidi_getParagraphcharIndex argument of
+ * ubidi_getLevelAtubidi_getLevelslogicalStart argument of
+ * ubidi_getLogicalRunlogicalIndex argument of
+ * ubidi_getVisualIndex*indexMap argument of
+ * ubidi_getLogicalMapubidi_writeReorderedUBiDi object.
+ *
+ * @return The length of the part of the source text processed by
+ * the last call to ubidi_setPara.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_STREAMING
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getProcessedLength(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the reordered text resulting from the last call to
+ * ubidi_setPara(). This length may be different from the length
+ * of the source text if option #UBIDI_OPTION_INSERT_MARKS
+ * or option #UBIDI_OPTION_REMOVE_CONTROLS has been set.
+ *
+ * This resulting length is the one to consider in the following cases:
+ *
+ *
+ * Note that this length stays identical to the source text length if
+ * Bidi marks are inserted or removed using option bits of
+ * visualIndex argument of
+ * ubidi_getLogicalIndex*indexMap argument of
+ * ubidi_getVisualMapubidi_writeReordered, or if option
+ * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L has been set.
+ *
+ * @param pBiDi is the paragraph UBiDi object.
+ *
+ * @return The length of the reordered text resulting from
+ * the last call to ubidi_setPara.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_INSERT_MARKS
+ * @see UBIDI_OPTION_REMOVE_CONTROLS
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getResultLength(const UBiDi *pBiDi);
+
+U_CDECL_BEGIN
+/**
+ * value returned by UBiDiClassCallback callbacks when
+ * there is no need to override the standard Bidi class for a given code point.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT
+
+/**
+ * Callback type declaration for overriding default Bidi class values with
+ * custom ones.
+ * UBiDi
+ * object by calling the ubidi_setClassCallback() function;
+ * then the callback will be invoked by the UBA implementation any time the
+ * class of a character is to be determined.c if the default class has been overridden, or
+ * #U_BIDI_CLASS_DEFAULT if the standard Bidi class value
+ * for c is to be used.
+ * @see ubidi_setClassCallback
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+typedef UCharDirection U_CALLCONV
+UBiDiClassCallback(const void *context, UChar32 c);
+
+U_CDECL_END
+
+/**
+ * Retrieve the Bidi class for a given code point.
+ * #UBiDiClassCallback callback is defined and returns a
+ * value other than #U_BIDI_CLASS_DEFAULT, that value is used;
+ * otherwise the default class determination mechanism is invoked.UBiDi object.
+ *
+ * @param c is the code point whose Bidi class must be retrieved.
+ *
+ * @return The Bidi class for character c based
+ * on the given pBiDi instance.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+U_STABLE UCharDirection U_EXPORT2
+ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
+
+/**
+ * Set the callback function and callback data used by the UBA
+ * implementation for Bidi class determination.
+ * UBiDi object.
+ *
+ * @param newFn is the new callback function pointer.
+ *
+ * @param newContext is the new callback context pointer. This can be NULL.
+ *
+ * @param oldFn fillin: Returns the old callback function pointer. This can be
+ * NULL.
+ *
+ * @param oldContext fillin: Returns the old callback's context. This can be
+ * NULL.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
+ const void *newContext, UBiDiClassCallback **oldFn,
+ const void **oldContext, UErrorCode *pErrorCode);
+
+/**
+ * Get the current callback function used for Bidi class determination.
+ *
+ * @param pBiDi is the paragraph UBiDi object.
+ *
+ * @param fn fillin: Returns the callback function pointer.
+ *
+ * @param context fillin: Returns the callback's private context.
+ *
+ * @see ubidi_setClassCallback
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
+
+/**
+ * Take a UBiDi object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * ubidi_setPara() or for a line of text set by
+ * ubidi_setLine() and write a reordered string to the
+ * destination buffer.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) modifier letters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the description of the destSize
+ * and options parameters and of the option bit flags.
+ *
+ * @param pBiDi A pointer to a UBiDi object that
+ * is set by ubidi_setPara() or
+ * ubidi_setLine() and contains the reordering
+ * information for the text that it was defined for,
+ * as well as a pointer to that text.
+ * The text was aliased (only the pointer was stored
+ * without copying the contents) and must not have been modified
+ * since the ubidi_setPara() call.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * The source text and dest[destSize]
+ * must not overlap.
+ *
+ * @param destSize The size of the dest buffer,
+ * in number of UChars.
+ * If the UBIDI_INSERT_LRM_FOR_NUMERIC
+ * option is set, then the destination length could be
+ * as large as
+ * ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi).
+ * If the UBIDI_REMOVE_BIDI_CONTROLS option
+ * is set, then the destination length may be less than
+ * ubidi_getLength(pBiDi).
+ * If none of these options is set, then the destination length
+ * will be exactly ubidi_getProcessedLength(pBiDi).
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * The options include mirroring the characters on a code
+ * point basis and inserting LRM characters, which is used
+ * especially for transforming visually stored text
+ * to logically stored text (although this is still an
+ * imperfect implementation of an "inverse Bidi" algorithm
+ * because it uses the "forward Bidi" algorithm at its core).
+ * The available options are:
+ * #UBIDI_DO_MIRRORING,
+ * #UBIDI_INSERT_LRM_FOR_NUMERIC,
+ * #UBIDI_KEEP_BASE_COMBINING,
+ * #UBIDI_OUTPUT_REVERSE,
+ * #UBIDI_REMOVE_BIDI_CONTROLS
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_writeReordered(UBiDi *pBiDi,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Reverse a Right-To-Left run of Unicode text.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) modifier letters.
+ * Characters can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters.
+ *
+ * This function is the implementation for reversing RTL runs as part
+ * of ubidi_writeReordered(). For detailed descriptions
+ * of the parameters, see there.
+ * Since no Bidi controls are inserted here, the output string length
+ * will never exceed srcLength.
+ *
+ * @see ubidi_writeReordered
+ *
+ * @param src A pointer to the RTL run text.
+ *
+ * @param srcLength The length of the RTL run.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * src[srcLength] and dest[destSize]
+ * must not overlap.
+ *
+ * @param destSize The size of the dest buffer,
+ * in number of UChars.
+ * If the UBIDI_REMOVE_BIDI_CONTROLS option
+ * is set, then the destination length may be less than
+ * srcLength.
+ * If this option is not set, then the destination length
+ * will be exactly srcLength.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * See the options parameter in ubidi_writeReordered().
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_writeReverse(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/*#define BIDI_SAMPLE_CODE*/
+/*@}*/
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ubrk.h b/jni/EastAsianWidth/unicode/ubrk.h
new file mode 100644
index 0000000..d57ba37
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ubrk.h
@@ -0,0 +1,482 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2007, International Business Machines Corporation and others.
+* All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef UBRK_H
+#define UBRK_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+#include "unicode/utext.h"
+
+/**
+ * A text-break iterator.
+ * For usage in C programs.
+ */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ /**
+ * Opaque type representing an ICU Break iterator object.
+ * @stable ICU 2.0
+ */
+ typedef void UBreakIterator;
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/parseerr.h"
+
+/**
+ * \file
+ * \brief C API: BreakIterator
+ *
+ * BreakIterator C API
+ *
+ * The BreakIterator C API defines methods for finding the location
+ * of boundaries in text. Pointer to a UBreakIterator maintain a
+ * current position and scan over text returning the index of characters
+ * where boundaries occur.
+ * 0<=code<=0x10ffff.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ * The name will always be zero-terminated.
+ * If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength ==sizeof(buffer)
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for U_SUCCESS() after u_charName()
+ * returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ * If the bufferLength is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get the ISO 10646 comment for a character.
+ * The ISO 10646 comment is an informative field in the Unicode Character
+ * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ * It must be 0<=c<=0x10ffff.
+ * @param dest Destination address for copying the comment.
+ * The comment will be zero-terminated if possible.
+ * If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity ==sizeof(dest)
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for U_SUCCESS() after u_getISOComment()
+ * returns.
+ * @return The length of the comment, or 0 if there is no comment for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+ char *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, pErrorCode
+ * is set to U_INVALID_CHAR_FOUND.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ * or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns FALSE, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return TRUE if the enumeration should continue, FALSE to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+ UChar32 code,
+ UCharNameChoice nameChoice,
+ const char *name,
+ int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ * (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_STABLE void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask". These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ * If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All properties have a long name. Most
+ * have a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt. Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK. These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched. The name is compared
+ * using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ * does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=whichc is not a valid digit in the specified
+ * radix, -1 is returned. A character is a valid digit
+ * if at least one of the following is true:
+ *
+ *
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param ch the code point to be tested.
+ * @param radix the radix.
+ * @return the numeric value represented by the character in the
+ * specified radix,
+ * or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @see u_forDigit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of 'A' through 'Z'.
+ * In this case the value is c-'A'+10.'a' through 'z'.
+ * In this case the value is ch-'a'+10.radix is not a
+ * valid radix, or the value of digit is not a valid
+ * digit in the specified radix, the null character
+ * (U+0000) is returned.
+ * radix argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The digit argument is valid if
+ * 0 <= digit < radix.
+ * '0' + digit is returned. Otherwise, the value
+ * 'a' + digit - 10 is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param digit the number to convert to a character.
+ * @param radix the radix.
+ * @return the char representation of the specified digit
+ * in the specified radix.
+ *
+ * @see u_digit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ * the Unicode version number
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ * It must be 0<=c<=0x10ffff.
+ * @param dest Destination address for copying the string.
+ * The string will be zero-terminated if possible.
+ * If there is no FC_NFKC_Closure string,
+ * then the buffer will be set to the empty string.
+ * @param destCapacity ==sizeof(dest)
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/jni/EastAsianWidth/unicode/uchriter.h b/jni/EastAsianWidth/unicode/uchriter.h
new file mode 100644
index 0000000..6d5a990
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uchriter.h
@@ -0,0 +1,381 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef UCHRITER_H
+#define UCHRITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/chariter.h"
+
+/**
+ * \file
+ * \brief C++ API: UChar Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a UChar array.
+ * It's possible not only to create an
+ * iterator that iterates over an entire UChar array, but also to
+ * create one that iterates over only a subrange of a UChar array
+ * (iterators over different subranges of the same UChar array don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
+public:
+ /**
+ * Create an iterator over the UChar array referred to by "textPtr".
+ * The iteration range is 0 to length-1.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * @param textPtr The UChar array to be iterated over
+ * @param length The length of the UChar array
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(const UChar* textPtr, int32_t length);
+
+ /**
+ * Create an iterator over the UChar array referred to by "textPtr".
+ * The iteration range is 0 to length-1.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * The starting
+ * position is specified by "position". If "position" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textPtr The UChar array to be iteratd over
+ * @param length The length of the UChar array
+ * @param position The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(const UChar* textPtr, int32_t length,
+ int32_t position);
+
+ /**
+ * Create an iterator over the UChar array referred to by "textPtr".
+ * The iteration range is 0 to end-1.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * The starting
+ * position is specified by "position". If begin and end do not
+ * form a valid iteration range or "position" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textPtr The UChar array to be iterated over
+ * @param length The length of the UChar array
+ * @param textBegin The begin position of the iteration range
+ * @param textEnd The end position of the iteration range
+ * @param position The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(const UChar* textPtr, int32_t length,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t position);
+
+ /**
+ * Copy constructor. The new iterator iterates over the same range
+ * of the same string as "that", and its initial position is the
+ * same as "that"'s current position.
+ * @param that The UCharCharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(const UCharCharacterIterator& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UCharCharacterIterator();
+
+ /**
+ * Assignment operator. *this is altered to iterate over the sane
+ * range of the same string as "that", and refers to the same
+ * character within that string as "that" does.
+ * @param that The object to be copied
+ * @return the newly created object
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator&
+ operator=(const UCharCharacterIterator& that);
+
+ /**
+ * Returns true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @param that The ForwardCharacterIterator used to be compared for equality
+ * @return true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ /**
+ * Returns a new UCharCharacterIterator referring to the same
+ * character in the same range of the same string as this one. The
+ * caller must delete the new iterator.
+ * @return the CharacterIterator newly created
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone(void) const;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar first(void);
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range
+ * @stable ICU 2.0
+ */
+ virtual UChar firstPostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar last(void);
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void);
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the position within the text-storage object
+ * @return the code unit
+ * @stable ICU 2.0
+ */
+ virtual UChar setIndex(int32_t position);
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the position within the text-storage object
+ * @return the code unit
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position);
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the code unit the iterator currently refers to.
+ * @stable ICU 2.0
+ */
+ virtual UChar current(void) const;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the code point the iterator currently refers to.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const;
+
+ /**
+ * Advances to the next code unit in the iteration range (toward
+ * endIndex()), and returns that code unit. If there are no more
+ * code units to return, returns DONE.
+ * @return the next code unit in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar next(void);
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar nextPostInc(void);
+
+ /**
+ * Advances to the next code point in the iteration range (toward
+ * endIndex()), and returns that code point. If there are no more
+ * code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void);
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void);
+
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @return FALSE if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext();
+
+ /**
+ * Advances to the previous code unit in the iteration range (toward
+ * startIndex()), and returns that code unit. If there are no more
+ * code units to return, returns DONE.
+ * @return the previous code unit in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar previous(void);
+
+ /**
+ * Advances to the previous code point in the iteration range (toward
+ * startIndex()), and returns that code point. If there are no more
+ * code points to return, returns DONE.
+ * @return the previous code point in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void);
+
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return FALSE if there are no more code units or code points
+ * before the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious();
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin);
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move32(int32_t delta, EOrigin origin);
+
+ /**
+ * Sets the iterator to iterate over a new range of text
+ * @stable ICU 2.0
+ */
+ void setText(const UChar* newText, int32_t newTextLength);
+
+ /**
+ * Copies the UChar array under iteration into the UnicodeString
+ * referred to by "result". Even if this iterator iterates across
+ * only a part of this string, the whole string is copied.
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result);
+
+ /**
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+protected:
+ /**
+ * Protected constructor
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator();
+ /**
+ * Protected member text
+ * @stable ICU 2.0
+ */
+ const UChar* text;
+
+};
+
+U_NAMESPACE_END
+#endif
diff --git a/jni/EastAsianWidth/unicode/uclean.h b/jni/EastAsianWidth/unicode/uclean.h
new file mode 100644
index 0000000..a13924a
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uclean.h
@@ -0,0 +1,267 @@
+/*
+******************************************************************************
+* *
+* Copyright (C) 2001-2005, International Business Machines *
+* Corporation and others. All Rights Reserved. *
+* *
+******************************************************************************
+* file name: uclean.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLEAN_H__
+#define __UCLEAN_H__
+
+#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C API: Initialize and clean up ICU
+ */
+
+/**
+ * Initialize ICU. The description further below applies to ICU 2.6 to ICU 3.4.
+ * Starting with ICU 3.4, u_init() needs not be called any more for
+ * ensuring thread safety, but it can give an indication for whether ICU
+ * can load its data. In ICU 3.4, it will try to load the converter alias table
+ * (cnvalias.icu) and give an error code if that fails.
+ * This may change in the future.
+ * u_setMemoryFunctions() or
+ * u_setMutexFunctions are needed (uncommon), they must be
+ * called _before_ u_init().
+ * NULL.
+ * An Error will be returned if some required part of ICU data can not
+ * be loaded or initialized.
+ * The function returns immediately if the input error code indicates a
+ * failure, as usual.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+u_init(UErrorCode *status);
+
+/**
+ * Clean up the system resources, such as allocated memory or open files,
+ * used in all ICU libraries. This will free/delete all memory owned by the
+ * ICU libraries, and return them to their original load state. All open ICU
+ * items (collators, resource bundles, converters, etc.) must be closed before
+ * calling this function, otherwise ICU may not free its allocated memory
+ * (e.g. close your converters and resource bundles before calling this
+ * function). Generally, this function should be called once just before
+ * an application exits. For applications that dynamically load and unload
+ * the ICU libraries (relatively uncommon), u_cleanup() should be called
+ * just before the library unload.
+ * Character Conversion C API
+ *
+ * NULL is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * UCNV_OPTION_SEP_CHAR between the name and the first option and
+ * also between adjacent options. udata_open( packageName, "cnv", converterName, err) or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.cnv = ucnv_openPackage("myapp", "myconverter", &err);
+ * U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+ char **target,
+ const char *targetLimit,
+ const UChar ** source,
+ const UChar * sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a stateful conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * U_BUFFER_OVERFLOW_ERROR, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the target buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as target. Will fill in offsets from target to source pointer
+ * e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6]
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to TRUE if the current source buffer is the last available
+ * chunk of the source, FALSE otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to TRUE until
+ * the source buffer is consumed.
+ * @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the
+ * converter is NULL.
+ * U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_toUnicode(UConverter *converter,
+ UChar **target,
+ const UChar *targetLimit,
+ const char **source,
+ const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+ char *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+ UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ * it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ * that is, for where the input is supplied in multiple buffers,
+ * because ucnv_getNextUChar() will assume the end of the input at the end
+ * of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ *
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ *
+ *
U_INDEX_OUTOFBOUNDS_ERROR will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a U_BUFFER_OVERFLOW_ERROR because
+ * the "buffer" is the return code. However, there might be subsequent output
+ * stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+ const char **source,
+ const char * sourceLimit,
+ UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * Important: For streaming conversion (multiple function calls for successive
+ * parts of a text stream), the caller must provide a pivot buffer explicitly,
+ * and must preserve the pivot buffer and associated pointers from one
+ * call to another. (The buffer may be moved if its contents and the relative
+ * pointer positions are preserved.)
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for streaming conversion and complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ * (if reset==TRUE, see parameters;
+ * also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - the pivot buffer can be provided internally;
+ * possible only for whole-string conversion, not streaming conversion;
+ * in this case, the caller will not be able to get details about where an
+ * error occurred
+ * (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ * const char *s, int32_t length,
+ * char *u8, int32_t capacity,
+ * UErrorCode *pErrorCode) {
+ * UConverter *utf8Cnv;
+ * char *target;
+ *
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * if(length<0) {
+ * length=strlen(s);
+ * }
+ * target=u8;
+ * ucnv_convertEx(cnv, utf8Cnv,
+ * &target, u8+capacity,
+ * &s, s+length,
+ * NULL, NULL, NULL, NULL,
+ * TRUE, TRUE,
+ * pErrorCode);
+ *
+ * myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ * // return the output string length, but without preflighting
+ * return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv Output converter, used to convert from the UTF-16 pivot
+ * to the target using ucnv_fromUnicode().
+ * @param sourceCnv Input converter, used to convert from the source to
+ * the UTF-16 pivot using ucnv_toUnicode().
+ * @param target I/O parameter, same as for ucnv_fromUChars().
+ * Input: *target points to the beginning of the target buffer.
+ * Output: *target points to the first unit after the last char written.
+ * @param targetLimit Pointer to the first unit after the target buffer.
+ * @param source I/O parameter, same as for ucnv_toUChars().
+ * Input: *source points to the beginning of the source buffer.
+ * Output: *source points to the first unit after the last char read.
+ * @param sourceLimit Pointer to the first unit after the source buffer.
+ * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ * then an internal buffer is used and the other pivot
+ * arguments are ignored and can be NULL as well.
+ * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
+ * conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
+ * conversion from the source buffer to the pivot buffer.
+ * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ * and pivotStartucnv_countAliases() string-pointers
+ * (const char *) that will be filled in.
+ * The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * ucnv_getStandardName, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * uenum_close when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ * by a standard. If any of the parameters are invalid, NULL
+ * is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+ const char *standard,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ *
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ *
+ * "alias2"
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ * if a standard converter name cannot be determined,
+ * then NULL is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ *
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ *
+ * "conv"
+ *
+ * @return returns the canonical converter name;
+ * if a standard or alias name cannot be determined,
+ * then NULL is returned. The returned string is
+ * owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * Returns the current default converter name. If you want to open
+ * a default converter, you do not need to use this function.
+ * It is faster if you pass a NULL argument to ucnv_open the
+ * default converter.
+ *
+ * @return returns the current default converter name.
+ * Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+/**
+ * This function is not thread safe. DO NOT call this function when ANY ICU
+ * function is being used from more than one thread! This function sets the
+ * current default converter name. If this function needs to be called, it
+ * should be called during application initialization. Most of the time, the
+ * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
+ * is sufficient for your application.
+ * @param name the converter name to be the default (must be known by ICU).
+ * @see ucnv_getDefaultName
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+
+/**
+ * Fixes the backslash character mismapping. For example, in SJIS, the backslash
+ * character in the ASCII portion is also used to represent the yen currency sign.
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the
+ * character back to yen or backslash in SJIS. This function will take the input
+ * buffer and replace all the yen sign characters with backslash. This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required. You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return TRUE if the converter contains ambiguous mapping of the same
+ * character, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mappings or not.
+ * Regardless of this flag, the converter will always use
+ * fallbacks from Unicode Private Use code points, as well as
+ * reverse fallbacks (to Unicode).
+ * For details see ".ucm File Format"
+ * in the Conversion Data chapter of the ICU User Guide:
+ * http://www.icu-project.org/userguide/conversion-data.html#ucmformat
+ *
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
+ * mapping, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_usesFallback
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * This flag has restrictions, see ucnv_setFallback().
+ *
+ * @param cnv The converter to be tested
+ * @return TRUE if the converter uses fallback, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_setFallback
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * Callback API for UConverter
+ *
+ * These functions are provided here for the convenience of the callback
+ * writer. If you are just looking for callback functions to use, please
+ * see ucnv_err.h. DO NOT call these functions directly when you are
+ * working with converters, unless your code has been called as a callback
+ * via ucnv_setFromUCallback or ucnv_setToUCallback !!
+ *
+ * A note about error codes and overflow. Unlike other ICU functions,
+ * these functions do not expect the error status to be U_ZERO_ERROR.
+ * Callbacks must be much more careful about their error codes.
+ * The error codes used here are in/out parameters, which should be passed
+ * back in the callback's error parameter.
+ *
+ * For example, if you call ucnv_cbfromUWriteBytes to write data out
+ * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if
+ * the data did not fit in the target. But this isn't a failing error,
+ * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
+ * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
+ * which will also go into the internal overflow buffers.
+ *
+ * Concerning offsets, the 'offset' parameters here are relative to the start
+ * of SOURCE. For example, Suppose the string "ABCD" was being converted
+ * from Unicode into a codepage which doesn't have a mapping for 'B'.
+ * 'A' will be written out correctly, but
+ * The FromU Callback will be called on an unassigned character for 'B'.
+ * At this point, this is the state of the world:
+ * Target: A [..] [points after A]
+ * Source: A B [C] D [points to C - B has been consumed]
+ * 0 1 2 3
+ * codePoint = "B" [the unassigned codepoint]
+ *
+ * Now, suppose a callback wants to write the substitution character '?' to
+ * the target. It calls ucnv_cbFromUWriteBytes() to write the ?.
+ * It should pass ZERO as the offset, because the offset as far as the
+ * callback is concerned is relative to the SOURCE pointer [which points
+ * before 'C'.] If the callback goes into the args and consumes 'C' also,
+ * it would call FromUWriteBytes with an offset of 1 (and advance the source
+ * pointer).
+ *
+ */
+
+#ifndef UCNV_CB_H
+#define UCNV_CB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+
+/**
+ * ONLY used by FromU callback functions.
+ * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source source bytes to write
+ * @param length length of bytes to write
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status. If U_BUFFER_OVERFLOW is returned, then U_BUFFER_OVERFLOW must
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
+ const char* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by FromU callback functions.
+ * This function will write out the correct substitution character sequence
+ * to the target.
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from the current source pointer to be used
+ * @param err error status. If U_BUFFER_OVERFLOW is returned, then U_BUFFER_OVERFLOW must
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteBytes
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by fromU callback functions.
+ * This function will write out the error character(s) to the target UChar buffer.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
+ * @param sourceLimit pointer after last UChar to write
+ * @param offsetIndex the relative offset index from callback which will be set
+ * @param err error status U_BUFFER_OVERFLOW
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
+ const UChar** source,
+ const UChar* sourceLimit,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the specified characters to the target
+ * UChar buffer.
+ *
+ * @param args callback toUnicode arguments
+ * @param source source string to write
+ * @param length the length of source string
+ * @param offsetIndex the relative offset index which will be written.
+ * @param err error status U_BUFFER_OVERFLOW
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
+ const UChar* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the Unicode substitution character (U+FFFD).
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status U_BUFFER_OVERFLOW
+ * @see ucnv_cbToUWriteUChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+#endif
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ucnv_err.h b/jni/EastAsianWidth/unicode/ucnv_err.h
new file mode 100644
index 0000000..b0db7bc
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ucnv_err.h
@@ -0,0 +1,456 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ *
+ *
+ * ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C UConverter predefined error callbacks
+ *
+ * Error Behaviour Functions
+ * Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ * These are provided as part of ICU and many are stable, but they
+ * can also be considered only as an example of what can be done with
+ * callbacks. You may of course write your own.
+ *
+ * If you want to write your own, you may also find the functions from
+ * ucnv_cb.h useful when writing your own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the ucnv_setFromUCallback
+ * and ucnv_setToUCallback functions, to set the behaviour of a converter
+ * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ * usage example: 'STOP' doesn't need any context, but newContext
+ * could be set to something other than 'NULL' if needed. The available
+ * contexts in this header can modify the default behavior of the callback.
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setFromUCallBack(myConverter,
+ * UCNV_FROM_U_CALLBACK_STOP,
+ * NULL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ * and ucnv_setToUCallBack would need to be called in order to change
+ * that behavior too.
+ *
+ * Here is an example with a context:
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setToUCallBack(myConverter,
+ * UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ * UCNV_SUB_STOP_ON_ILLEGAL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Codepage -> Unicode. Any unmapped and legal characters will be
+ * substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE "U"
+
+/**
+ * The process condition code to be used with the callbacks.
+ * Codes which are greater than UCNV_IRREGULAR should be
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
+ \\x81\\x2E is illegal in SJIS because \\x2E
+ is not a valid trail byte for the \\x81
+ lead byte.
+ Also, starting with Unicode 3.0.1, non-shortest byte sequences
+ in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+ are also illegal, not just irregular.
+ The error code U_ILLEGAL_CHAR_FOUND will be set. */
+ UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
+ the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+ are irregular UTF-8 byte sequences for single surrogate
+ code points.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_RESET = 3, /**< The callback is called with this reason when a
+ 'reset' has occured. Callback should reset all
+ state. */
+ UCNV_CLOSE = 4, /**< Called when the converter is closed. The
+ callback should release any allocated memory.*/
+ UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
+ converter. the pointer available as the
+ 'context' is an alias to the original converters'
+ context pointer. If the context must be owned
+ by the new converter, the callback must clone
+ the data and call ucnv_setFromUCallback
+ (or setToUCallback) with the correct pointer.
+ @stable ICU 2.2
+ */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ *
+ *
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/
diff --git a/jni/EastAsianWidth/unicode/uconfig.h b/jni/EastAsianWidth/unicode/uconfig.h
new file mode 100644
index 0000000..d67b609
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uconfig.h
@@ -0,0 +1,215 @@
+/*
+**********************************************************************
+* Copyright (C) 2002-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: uconfig.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+/*!
+ * \file
+ * \brief Switches for excluding parts of ICU library code modules.
+ *
+ * Allows to build partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * Building with any of these options does not guarantee that the
+ * ICU build process will completely work. It is recommended that
+ * the ICU libraries and data be built using the normal build.
+ * At that time you should remove the data used by those services.
+ * After building the ICU data library, you should rebuild the ICU
+ * libraries with these switches customized to your needs.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+# define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+ /* common library */
+# define UCONFIG_NO_BREAK_ITERATION 1
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_NO_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_FORMATTING 1
+# define UCONFIG_NO_TRANSLITERATION 1
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_FILE_IO
+ * This switch turns off all file access in the common library
+ * where file access is only used for data loading.
+ * ICU data must then be provided in the form of a data DLL (or with an
+ * equivalent way to link to the data residing in an executable,
+ * as in building a combined library with both the common library's code and
+ * the data), or via udata_setCommonData().
+ * Application data must be provided via udata_setAppData() or by using
+ * "open" functions that take pointers to data, for example ucol_openBinary().
+ *
+ * File access is not used at all in the i18n library.
+ *
+ * File access cannot be turned off for the icuio library or for the ICU
+ * test suites and ICU tools.
+ *
+ * @stable ICU 3.6
+ */
+#ifndef UCONFIG_NO_FILE_IO
+# define UCONFIG_NO_FILE_IO 0
+#endif
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build with this switch turned on.
+ * This switch turns off all converters.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+# define UCONFIG_NO_NORMALIZATION 0
+#elif UCONFIG_NO_NORMALIZATION
+ /* common library */
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_ONLY_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_COLLATION 1
+# define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+# define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+# define UCONFIG_NO_IDNA 0
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+# define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+# define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+# define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+# define UCONFIG_NO_SERVICE 1
+#endif
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/udata.h b/jni/EastAsianWidth/unicode/udata.h
new file mode 100644
index 0000000..95348fb
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/udata.h
@@ -0,0 +1,389 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: udata.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UDATA_H__
+#define __UDATA_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: Data loading interface
+ *
+ * Information about data loading interface
+ *
+ * This API is used to find and efficiently load data for ICU and applications
+ * using ICU. It provides an abstract interface that specifies a data type and
+ * name to find and load the data. Normally this API is used by other ICU APIs
+ * to load required data out of the ICU data library, but it can be used to
+ * load data out of other places.
+ *
+ * See the User Guide Data Management chapter.
+ */
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Character used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR '-'
+
+/**
+ * String used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR_STRING "-"
+
+/**
+ * Character used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_CHAR '/'
+
+/**
+ * String used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_STRING "/"
+
+/**
+ * Alias for standard ICU data
+ * @internal ICU 3.0
+ */
+#define U_ICUDATA_ALIAS "ICUDATA"
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * UDataInfo contains the properties about the requested data.
+ * This is meta data.
+ *
+ * size field.udata_open[Choice]()
+ * functions may reject data based on the value in isBigEndian.
+ * No other field is used by the udata API implementation.dataFormat may be used to identify
+ * the kind of data, e.g. a converter table.formatVersion field should be used to
+ * make sure that the format can be interpreted.
+ * I may be a good idea to check only for the one or two highest
+ * of the version elements to allow the data memory to
+ * get more or somewhat rearranged contents, for as long
+ * as the using code can still interpret the older contents.dataVersion field is intended to be a
+ * common place to store the source version of the data;
+ * for data from the Unicode character database, this could
+ * reflect the Unicode version.udata_openChoice().
+ * @param type The type of the data as passed into udata_openChoice().
+ * It may be NULL.
+ * @param name The name of the data as passed into udata_openChoice().
+ * @param pInfo A pointer to the UDataInfo structure
+ * of data that has been loaded and will be returned
+ * by udata_openChoice() if this function
+ * returns TRUE.
+ * @return TRUE if the current data memory is acceptable
+ * @stable ICU 2.0
+ */
+typedef UBool U_CALLCONV
+UDataMemoryIsAcceptable(void *context,
+ const char *type, const char *name,
+ const UDataInfo *pInfo);
+
+
+/**
+ * Convenience function.
+ * This function works the same as udata_openChoice
+ * except that any data that matches the type and name
+ * is assumed to be acceptable.
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * NULL for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be NULL or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be NULL.
+ * @return A pointer (handle) to a data memory object, or NULL
+ * if an error occurs. Call udata_getMemory()
+ * to get a pointer to the actual data.
+ *
+ * @see udata_openChoice
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Data loading function.
+ * This function is used to find and load efficiently data for
+ * ICU and applications using ICU.
+ * It provides an abstract interface that allows to specify a data
+ * type and name to find and load the data.
+ *
+ * UDataInfo structure.
+ * The caller's isAcceptable() function is called to make
+ * sure that the data is useful. It may be called several times if it
+ * rejects the data and there is more than one location with data
+ * matching the type and name.path==NULL, then ICU data is loaded.
+ * Otherwise, it is separated into a basename and a basename-less directory string.
+ * The basename is used as the data package name, and the directory is
+ * logically prepended to the ICU data directory string.NULL for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be NULL or empty.
+ * @param name A string that specifies the name of the data.
+ * @param isAcceptable This function is called to verify that loaded data
+ * is useful for the client code. If it returns FALSE
+ * for all data items, then udata_openChoice()
+ * will return with an error.
+ * @param context Arbitrary parameter to be passed into isAcceptable.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be NULL.
+ * @return A pointer (handle) to a data memory object, or NULL
+ * if an error occurs. Call udata_getMemory()
+ * to get a pointer to the actual data.
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *pErrorCode);
+
+/**
+ * Close the data memory.
+ * This function must be called to allow the system to
+ * release resources associated with this data memory.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_close(UDataMemory *pData);
+
+/**
+ * Get the pointer to the actual data inside the data memory.
+ * The data is read-only.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData);
+
+/**
+ * Get the information from the data memory header.
+ * This allows to get access to the header containing
+ * platform data properties etc. which is not part of
+ * the data itself and can therefore not be accessed
+ * via the pointer that udata_getMemory() returns.
+ *
+ * @param pData pointer to the data memory object
+ * @param pInfo pointer to a UDataInfo object;
+ * its size field must be set correctly,
+ * typically to sizeof(UDataInfo).
+ *
+ * *pInfo will be filled with the UDataInfo structure
+ * in the data memory object. If this structure is smaller than
+ * pInfo->size, then the size will be
+ * adjusted and only part of the structure will be filled.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
+
+/**
+ * This function bypasses the normal ICU data loading process and
+ * allows you to force ICU's system data to come out of a user-specified
+ * area in memory.
+ *
+ * The format of this data is that of the icu common data file, as is
+ * generated by the pkgdata tool with mode=common or mode=dll.
+ * You can read in a whole common mode file and pass the address to the start of the
+ * data, or (with the appropriate link options) pass in the pointer to
+ * the data that has been loaded from a dll by the operating system,
+ * as shown in this code:
+ *
+ * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
+ * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
+ * UErrorCode status = U_ZERO_ERROR;
+ *
+ * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
+ *
+ * Warning: ICU must NOT have even attempted to access its data yet
+ * when this call is made, or U_USING_DEFAULT_WARNING code will
+ * be returned. Be careful of UnicodeStrings in static initialization which
+ * may attempt to load a converter (use the UNICODE_STRING(x) macro instead).
+ *
+ * Also note that it is important that the declaration be as above. The entry point
+ * must not be declared as an extern void*.
+ *
+ * This function has no effect on application (non ICU) data. See udata_setAppData()
+ * for similar functionality for application data.
+ *
+ * @param data pointer to ICU common data
+ * @param err outgoing error status U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *err);
+
+
+/**
+ * This function bypasses the normal ICU data loading process for application-specific
+ * data and allows you to force the it to come out of a user-specified
+ * pointer.
+ *
+ * The format of this data is that of the icu common data file, like 'icudt26l.dat'
+ * or the corresponding shared library (DLL) file.
+ * The application must read in or otherwise construct an image of the data and then
+ * pass the address of it to this function.
+ *
+ *
+ * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
+ * data with the specifed path that has already been opened, or
+ * if setAppData with the same path has already been called.
+ * Any such calls to setAppData will have no effect.
+ *
+ *
+ * @param packageName the package name by which the application will refer
+ * to (open) this data
+ * @param data pointer to the data
+ * @param err outgoing error status U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR
+ * @see udata_setCommonData
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
+
+/**
+ * Possible settings for udata_setFileAccess()
+ * @see udata_setFileAccess
+ * @stable ICU 3.4
+ */
+typedef enum UDataFileAccess {
+ /** ICU does not access the file system for data loading. */
+ UDATA_NO_FILES,
+ /** ICU only loads data from packages, not from single files. */
+ UDATA_ONLY_PACKAGES,
+ /** ICU loads data from packages first, and only from single files
+ if the data cannot be found in a package. */
+ UDATA_PACKAGES_FIRST,
+ /** ICU looks for data in single files first, then in packages. (default) */
+ UDATA_FILES_FIRST,
+ /** An alias for the default access mode. */
+ UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
+ UDATA_FILE_ACCESS_COUNT
+} UDataFileAccess;
+
+/**
+ * This function may be called to control how ICU loads data. It must be called
+ * before any ICU data is loaded, including application data loaded with ures/ResourceBundle or
+ * udata APIs. It should be called before u_init. This function is not multithread safe.
+ * The results of calling it while other threads are loading data are undefined.
+ * @param access The type of file access to be used
+ * @param status Error code.
+ * @see UDataFileAccess
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
+
+U_CDECL_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/udeprctd.h b/jni/EastAsianWidth/unicode/udeprctd.h
new file mode 100644
index 0000000..158967e
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/udeprctd.h
@@ -0,0 +1,50 @@
+/*
+*******************************************************************************
+* Copyright (C) 2004-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name:
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+* Contains data for commenting out APIs.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UDEPRCTD_H
+#define UDEPRCTD_H
+
+#ifdef U_HIDE_DEPRECATED_API
+
+# if U_DISABLE_RENAMING
+# define ucol_getContractions ucol_getContractions_DEPRECATED_API_DO_NOT_USE
+# define ucol_getLocale ucol_getLocale_DEPRECATED_API_DO_NOT_USE
+# define ures_countArrayItems ures_countArrayItems_DEPRECATED_API_DO_NOT_USE
+# define ures_getLocale ures_getLocale_DEPRECATED_API_DO_NOT_USE
+# define ures_getVersionNumber ures_getVersionNumber_DEPRECATED_API_DO_NOT_USE
+# define utrans_getAvailableID utrans_getAvailableID_DEPRECATED_API_DO_NOT_USE
+# define utrans_getID utrans_getID_DEPRECATED_API_DO_NOT_USE
+# define utrans_open utrans_open_DEPRECATED_API_DO_NOT_USE
+# define utrans_unregister utrans_unregister_DEPRECATED_API_DO_NOT_USE
+# else
+# define ucol_getContractions_3_8 ucol_getContractions_DEPRECATED_API_DO_NOT_USE
+# define ucol_getLocale_3_8 ucol_getLocale_DEPRECATED_API_DO_NOT_USE
+# define ures_countArrayItems_3_8 ures_countArrayItems_DEPRECATED_API_DO_NOT_USE
+# define ures_getLocale_3_8 ures_getLocale_DEPRECATED_API_DO_NOT_USE
+# define ures_getVersionNumber_3_8 ures_getVersionNumber_DEPRECATED_API_DO_NOT_USE
+# define utrans_getAvailableID_3_8 utrans_getAvailableID_DEPRECATED_API_DO_NOT_USE
+# define utrans_getID_3_8 utrans_getID_DEPRECATED_API_DO_NOT_USE
+# define utrans_open_3_8 utrans_open_DEPRECATED_API_DO_NOT_USE
+# define utrans_unregister_3_8 utrans_unregister_DEPRECATED_API_DO_NOT_USE
+# endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* UDEPRCTD_H */
+
diff --git a/jni/EastAsianWidth/unicode/udraft.h b/jni/EastAsianWidth/unicode/udraft.h
new file mode 100644
index 0000000..f869b24
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/udraft.h
@@ -0,0 +1,186 @@
+/*
+*******************************************************************************
+* Copyright (C) 2004-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name:
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+* Contains data for commenting out APIs.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UDRAFT_H
+#define UDRAFT_H
+
+#ifdef U_HIDE_DRAFT_API
+
+# if U_DISABLE_RENAMING
+# define u_fclose u_fclose_DRAFT_API_DO_NOT_USE
+# define u_feof u_feof_DRAFT_API_DO_NOT_USE
+# define u_fflush u_fflush_DRAFT_API_DO_NOT_USE
+# define u_fgetConverter u_fgetConverter_DRAFT_API_DO_NOT_USE
+# define u_fgetc u_fgetc_DRAFT_API_DO_NOT_USE
+# define u_fgetcodepage u_fgetcodepage_DRAFT_API_DO_NOT_USE
+# define u_fgetcx u_fgetcx_DRAFT_API_DO_NOT_USE
+# define u_fgetfile u_fgetfile_DRAFT_API_DO_NOT_USE
+# define u_fgetlocale u_fgetlocale_DRAFT_API_DO_NOT_USE
+# define u_fgets u_fgets_DRAFT_API_DO_NOT_USE
+# define u_file_read u_file_read_DRAFT_API_DO_NOT_USE
+# define u_file_write u_file_write_DRAFT_API_DO_NOT_USE
+# define u_finit u_finit_DRAFT_API_DO_NOT_USE
+# define u_fopen u_fopen_DRAFT_API_DO_NOT_USE
+# define u_fprintf u_fprintf_DRAFT_API_DO_NOT_USE
+# define u_fprintf_u u_fprintf_u_DRAFT_API_DO_NOT_USE
+# define u_fputc u_fputc_DRAFT_API_DO_NOT_USE
+# define u_fputs u_fputs_DRAFT_API_DO_NOT_USE
+# define u_frewind u_frewind_DRAFT_API_DO_NOT_USE
+# define u_fscanf u_fscanf_DRAFT_API_DO_NOT_USE
+# define u_fscanf_u u_fscanf_u_DRAFT_API_DO_NOT_USE
+# define u_fsetcodepage u_fsetcodepage_DRAFT_API_DO_NOT_USE
+# define u_fsetlocale u_fsetlocale_DRAFT_API_DO_NOT_USE
+# define u_fsettransliterator u_fsettransliterator_DRAFT_API_DO_NOT_USE
+# define u_fstropen u_fstropen_DRAFT_API_DO_NOT_USE
+# define u_fungetc u_fungetc_DRAFT_API_DO_NOT_USE
+# define u_snprintf u_snprintf_DRAFT_API_DO_NOT_USE
+# define u_snprintf_u u_snprintf_u_DRAFT_API_DO_NOT_USE
+# define u_sprintf u_sprintf_DRAFT_API_DO_NOT_USE
+# define u_sprintf_u u_sprintf_u_DRAFT_API_DO_NOT_USE
+# define u_sscanf u_sscanf_DRAFT_API_DO_NOT_USE
+# define u_sscanf_u u_sscanf_u_DRAFT_API_DO_NOT_USE
+# define u_vfprintf u_vfprintf_DRAFT_API_DO_NOT_USE
+# define u_vfprintf_u u_vfprintf_u_DRAFT_API_DO_NOT_USE
+# define u_vfscanf u_vfscanf_DRAFT_API_DO_NOT_USE
+# define u_vfscanf_u u_vfscanf_u_DRAFT_API_DO_NOT_USE
+# define u_vsnprintf u_vsnprintf_DRAFT_API_DO_NOT_USE
+# define u_vsnprintf_u u_vsnprintf_u_DRAFT_API_DO_NOT_USE
+# define u_vsprintf u_vsprintf_DRAFT_API_DO_NOT_USE
+# define u_vsprintf_u u_vsprintf_u_DRAFT_API_DO_NOT_USE
+# define u_vsscanf u_vsscanf_DRAFT_API_DO_NOT_USE
+# define u_vsscanf_u u_vsscanf_u_DRAFT_API_DO_NOT_USE
+# define ucal_getTZDataVersion ucal_getTZDataVersion_DRAFT_API_DO_NOT_USE
+# define ucasemap_getBreakIterator ucasemap_getBreakIterator_DRAFT_API_DO_NOT_USE
+# define ucasemap_setBreakIterator ucasemap_setBreakIterator_DRAFT_API_DO_NOT_USE
+# define ucasemap_toTitle ucasemap_toTitle_DRAFT_API_DO_NOT_USE
+# define ucasemap_utf8FoldCase ucasemap_utf8FoldCase_DRAFT_API_DO_NOT_USE
+# define ucasemap_utf8ToTitle ucasemap_utf8ToTitle_DRAFT_API_DO_NOT_USE
+# define udatpg_addPattern udatpg_addPattern_DRAFT_API_DO_NOT_USE
+# define udatpg_clone udatpg_clone_DRAFT_API_DO_NOT_USE
+# define udatpg_close udatpg_close_DRAFT_API_DO_NOT_USE
+# define udatpg_getAppendItemFormat udatpg_getAppendItemFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_getAppendItemName udatpg_getAppendItemName_DRAFT_API_DO_NOT_USE
+# define udatpg_getBaseSkeleton udatpg_getBaseSkeleton_DRAFT_API_DO_NOT_USE
+# define udatpg_getBestPattern udatpg_getBestPattern_DRAFT_API_DO_NOT_USE
+# define udatpg_getDateTimeFormat udatpg_getDateTimeFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_getDecimal udatpg_getDecimal_DRAFT_API_DO_NOT_USE
+# define udatpg_getPatternForSkeleton udatpg_getPatternForSkeleton_DRAFT_API_DO_NOT_USE
+# define udatpg_getSkeleton udatpg_getSkeleton_DRAFT_API_DO_NOT_USE
+# define udatpg_open udatpg_open_DRAFT_API_DO_NOT_USE
+# define udatpg_openBaseSkeletons udatpg_openBaseSkeletons_DRAFT_API_DO_NOT_USE
+# define udatpg_openEmpty udatpg_openEmpty_DRAFT_API_DO_NOT_USE
+# define udatpg_openSkeletons udatpg_openSkeletons_DRAFT_API_DO_NOT_USE
+# define udatpg_replaceFieldTypes udatpg_replaceFieldTypes_DRAFT_API_DO_NOT_USE
+# define udatpg_setAppendItemFormat udatpg_setAppendItemFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_setAppendItemName udatpg_setAppendItemName_DRAFT_API_DO_NOT_USE
+# define udatpg_setDateTimeFormat udatpg_setDateTimeFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_setDecimal udatpg_setDecimal_DRAFT_API_DO_NOT_USE
+# define uloc_getLocaleForLCID uloc_getLocaleForLCID_DRAFT_API_DO_NOT_USE
+# define uset_clone uset_clone_DRAFT_API_DO_NOT_USE
+# define uset_cloneAsThawed uset_cloneAsThawed_DRAFT_API_DO_NOT_USE
+# define uset_freeze uset_freeze_DRAFT_API_DO_NOT_USE
+# define uset_isFrozen uset_isFrozen_DRAFT_API_DO_NOT_USE
+# define uset_span uset_span_DRAFT_API_DO_NOT_USE
+# define uset_spanBack uset_spanBack_DRAFT_API_DO_NOT_USE
+# define uset_spanBackUTF8 uset_spanBackUTF8_DRAFT_API_DO_NOT_USE
+# define uset_spanUTF8 uset_spanUTF8_DRAFT_API_DO_NOT_USE
+# else
+# define u_fclose_3_8 u_fclose_DRAFT_API_DO_NOT_USE
+# define u_feof_3_8 u_feof_DRAFT_API_DO_NOT_USE
+# define u_fflush_3_8 u_fflush_DRAFT_API_DO_NOT_USE
+# define u_fgetConverter_3_8 u_fgetConverter_DRAFT_API_DO_NOT_USE
+# define u_fgetc_3_8 u_fgetc_DRAFT_API_DO_NOT_USE
+# define u_fgetcodepage_3_8 u_fgetcodepage_DRAFT_API_DO_NOT_USE
+# define u_fgetcx_3_8 u_fgetcx_DRAFT_API_DO_NOT_USE
+# define u_fgetfile_3_8 u_fgetfile_DRAFT_API_DO_NOT_USE
+# define u_fgetlocale_3_8 u_fgetlocale_DRAFT_API_DO_NOT_USE
+# define u_fgets_3_8 u_fgets_DRAFT_API_DO_NOT_USE
+# define u_file_read_3_8 u_file_read_DRAFT_API_DO_NOT_USE
+# define u_file_write_3_8 u_file_write_DRAFT_API_DO_NOT_USE
+# define u_finit_3_8 u_finit_DRAFT_API_DO_NOT_USE
+# define u_fopen_3_8 u_fopen_DRAFT_API_DO_NOT_USE
+# define u_fprintf_3_8 u_fprintf_DRAFT_API_DO_NOT_USE
+# define u_fprintf_u_3_8 u_fprintf_u_DRAFT_API_DO_NOT_USE
+# define u_fputc_3_8 u_fputc_DRAFT_API_DO_NOT_USE
+# define u_fputs_3_8 u_fputs_DRAFT_API_DO_NOT_USE
+# define u_frewind_3_8 u_frewind_DRAFT_API_DO_NOT_USE
+# define u_fscanf_3_8 u_fscanf_DRAFT_API_DO_NOT_USE
+# define u_fscanf_u_3_8 u_fscanf_u_DRAFT_API_DO_NOT_USE
+# define u_fsetcodepage_3_8 u_fsetcodepage_DRAFT_API_DO_NOT_USE
+# define u_fsetlocale_3_8 u_fsetlocale_DRAFT_API_DO_NOT_USE
+# define u_fsettransliterator_3_8 u_fsettransliterator_DRAFT_API_DO_NOT_USE
+# define u_fstropen_3_8 u_fstropen_DRAFT_API_DO_NOT_USE
+# define u_fungetc_3_8 u_fungetc_DRAFT_API_DO_NOT_USE
+# define u_snprintf_3_8 u_snprintf_DRAFT_API_DO_NOT_USE
+# define u_snprintf_u_3_8 u_snprintf_u_DRAFT_API_DO_NOT_USE
+# define u_sprintf_3_8 u_sprintf_DRAFT_API_DO_NOT_USE
+# define u_sprintf_u_3_8 u_sprintf_u_DRAFT_API_DO_NOT_USE
+# define u_sscanf_3_8 u_sscanf_DRAFT_API_DO_NOT_USE
+# define u_sscanf_u_3_8 u_sscanf_u_DRAFT_API_DO_NOT_USE
+# define u_vfprintf_3_8 u_vfprintf_DRAFT_API_DO_NOT_USE
+# define u_vfprintf_u_3_8 u_vfprintf_u_DRAFT_API_DO_NOT_USE
+# define u_vfscanf_3_8 u_vfscanf_DRAFT_API_DO_NOT_USE
+# define u_vfscanf_u_3_8 u_vfscanf_u_DRAFT_API_DO_NOT_USE
+# define u_vsnprintf_3_8 u_vsnprintf_DRAFT_API_DO_NOT_USE
+# define u_vsnprintf_u_3_8 u_vsnprintf_u_DRAFT_API_DO_NOT_USE
+# define u_vsprintf_3_8 u_vsprintf_DRAFT_API_DO_NOT_USE
+# define u_vsprintf_u_3_8 u_vsprintf_u_DRAFT_API_DO_NOT_USE
+# define u_vsscanf_3_8 u_vsscanf_DRAFT_API_DO_NOT_USE
+# define u_vsscanf_u_3_8 u_vsscanf_u_DRAFT_API_DO_NOT_USE
+# define ucal_getTZDataVersion_3_8 ucal_getTZDataVersion_DRAFT_API_DO_NOT_USE
+# define ucasemap_getBreakIterator_3_8 ucasemap_getBreakIterator_DRAFT_API_DO_NOT_USE
+# define ucasemap_setBreakIterator_3_8 ucasemap_setBreakIterator_DRAFT_API_DO_NOT_USE
+# define ucasemap_toTitle_3_8 ucasemap_toTitle_DRAFT_API_DO_NOT_USE
+# define ucasemap_utf8FoldCase_3_8 ucasemap_utf8FoldCase_DRAFT_API_DO_NOT_USE
+# define ucasemap_utf8ToTitle_3_8 ucasemap_utf8ToTitle_DRAFT_API_DO_NOT_USE
+# define udatpg_addPattern_3_8 udatpg_addPattern_DRAFT_API_DO_NOT_USE
+# define udatpg_clone_3_8 udatpg_clone_DRAFT_API_DO_NOT_USE
+# define udatpg_close_3_8 udatpg_close_DRAFT_API_DO_NOT_USE
+# define udatpg_getAppendItemFormat_3_8 udatpg_getAppendItemFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_getAppendItemName_3_8 udatpg_getAppendItemName_DRAFT_API_DO_NOT_USE
+# define udatpg_getBaseSkeleton_3_8 udatpg_getBaseSkeleton_DRAFT_API_DO_NOT_USE
+# define udatpg_getBestPattern_3_8 udatpg_getBestPattern_DRAFT_API_DO_NOT_USE
+# define udatpg_getDateTimeFormat_3_8 udatpg_getDateTimeFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_getDecimal_3_8 udatpg_getDecimal_DRAFT_API_DO_NOT_USE
+# define udatpg_getPatternForSkeleton_3_8 udatpg_getPatternForSkeleton_DRAFT_API_DO_NOT_USE
+# define udatpg_getSkeleton_3_8 udatpg_getSkeleton_DRAFT_API_DO_NOT_USE
+# define udatpg_openBaseSkeletons_3_8 udatpg_openBaseSkeletons_DRAFT_API_DO_NOT_USE
+# define udatpg_openEmpty_3_8 udatpg_openEmpty_DRAFT_API_DO_NOT_USE
+# define udatpg_openSkeletons_3_8 udatpg_openSkeletons_DRAFT_API_DO_NOT_USE
+# define udatpg_open_3_8 udatpg_open_DRAFT_API_DO_NOT_USE
+# define udatpg_replaceFieldTypes_3_8 udatpg_replaceFieldTypes_DRAFT_API_DO_NOT_USE
+# define udatpg_setAppendItemFormat_3_8 udatpg_setAppendItemFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_setAppendItemName_3_8 udatpg_setAppendItemName_DRAFT_API_DO_NOT_USE
+# define udatpg_setDateTimeFormat_3_8 udatpg_setDateTimeFormat_DRAFT_API_DO_NOT_USE
+# define udatpg_setDecimal_3_8 udatpg_setDecimal_DRAFT_API_DO_NOT_USE
+# define uloc_getLocaleForLCID_3_8 uloc_getLocaleForLCID_DRAFT_API_DO_NOT_USE
+# define uset_cloneAsThawed_3_8 uset_cloneAsThawed_DRAFT_API_DO_NOT_USE
+# define uset_clone_3_8 uset_clone_DRAFT_API_DO_NOT_USE
+# define uset_freeze_3_8 uset_freeze_DRAFT_API_DO_NOT_USE
+# define uset_isFrozen_3_8 uset_isFrozen_DRAFT_API_DO_NOT_USE
+# define uset_spanBackUTF8_3_8 uset_spanBackUTF8_DRAFT_API_DO_NOT_USE
+# define uset_spanBack_3_8 uset_spanBack_DRAFT_API_DO_NOT_USE
+# define uset_spanUTF8_3_8 uset_spanUTF8_DRAFT_API_DO_NOT_USE
+# define uset_span_3_8 uset_span_DRAFT_API_DO_NOT_USE
+# endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_DRAFT_API */
+#endif /* UDRAFT_H */
+
diff --git a/jni/EastAsianWidth/unicode/uenum.h b/jni/EastAsianWidth/unicode/uenum.h
new file mode 100644
index 0000000..ff9b299
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uenum.h
@@ -0,0 +1,134 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: String Enumeration
+ */
+
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator. If en is NULL,
+ * does nothing. After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+/**
+ * Returns the number of elements that the iterator traverses. If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ * iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0. If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service. Set to
+ * U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ * UChar* and conversion to char* with the invariant converter
+ * fails. This error pertains only to current string, so iteration
+ * might be able to continue successfully.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs. This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/uidna.h b/jni/EastAsianWidth/unicode/uidna.h
new file mode 100644
index 0000000..52aa6e9
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uidna.h
@@ -0,0 +1,312 @@
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2007, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: uidna.h
+ * encoding: US-ASCII
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003feb1
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __UIDNA_H__
+#define __UIDNA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+
+/**
+ * \file
+ * \brief C API: Internationalized Domain Names in Applications Tranformation
+ *
+ * UIDNA API implements the IDNA protocol as defined in the IDNA RFC
+ * (http://www.ietf.org/rfc/rfc3490.txt).
+ * The RFC defines 2 operations: ToASCII and ToUnicode. Domain labels
+ * containing non-ASCII code points are required to be processed by
+ * ToASCII operation before passing it to resolver libraries. Domain names
+ * that are obtained from resolver libraries are required to be processed by
+ * ToUnicode operation before displaying the domain name to the user.
+ * IDNA requires that implementations process input strings with Nameprep
+ * (http://www.ietf.org/rfc/rfc3491.txt),
+ * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
+ * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
+ * Implementations of IDNA MUST fully implement Nameprep and Punycode;
+ * neither Nameprep nor Punycode are optional.
+ * The input and output of ToASCII and ToUnicode operations are Unicode
+ * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
+ * multiple times to an input string will yield the same result as applying the operation
+ * once.
+ * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
+ * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
+ *
+ */
+
+/**
+ * Option to prohibit processing of unassigned codepoints in the input and
+ * do not check if the input conforms to STD-3 ASCII rules.
+ *
+ * @see uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_DEFAULT 0x0000
+/**
+ * Option to allow processing of unassigned codepoints in the input
+ *
+ * @see uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_ALLOW_UNASSIGNED 0x0001
+/**
+ * Option to check if input conforms to STD-3 ASCII rules
+ *
+ * @see uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_USE_STD3_RULES 0x0002
+
+/**
+ * This function implements the ToASCII operation as defined in the IDNA RFC.
+ * This operation is done on single labels before sending it to something that expects
+ * ASCII names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; e.g." "www.example.com" is composed of 3 labels
+ * "www","example", and "com".
+ *
+ *
+ * @param src Input UChar array containing label in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * This function implements the ToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on single labels before sending it to something that expects
+ * Unicode names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; for e.g." "www.example.com" is composed of 3 labels
+ * "www","example", and "com".
+ *
+ * @param src Input UChar array containing ASCII (ACE encoded) label.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output Converted UChar array containing Unicode equivalent of label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points. Note: This option is
+ * required on toUnicode operation because the RFC mandates
+ * verification of decoded ACE input by applying toASCII and comparing
+ * its output with source
+ *
+ *
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_toUnicode(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
+ * domain name cannot be used as an Internationalized Domain Name and the application
+ * should have methods defined to deal with the failure.
+ *
+ * Note: IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_IDNToASCII( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ *
+ * Note: IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array containing Unicode equivalent of source IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * Compare two IDN strings for equivalence.
+ * This function splits the domain names into labels and compares them.
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
+ * applying toASCII) match using an case-insensitive ASCII comparison.
+ * Two domain names are considered a match if and only if all labels
+ * match regardless of whether label separators match.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_compare( const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ int32_t options,
+ UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/uintrnal.h b/jni/EastAsianWidth/unicode/uintrnal.h
new file mode 100644
index 0000000..dedae51
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uintrnal.h
@@ -0,0 +1,180 @@
+/*
+*******************************************************************************
+* Copyright (C) 2004-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name:
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+* Contains data for commenting out APIs.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UINTRNAL_H
+#define UINTRNAL_H
+
+#ifdef U_HIDE_INTERNAL_API
+
+# if U_DISABLE_RENAMING
+# define RegexPatternDump RegexPatternDump_INTERNAL_API_DO_NOT_USE
+# define pl_addFontRun pl_addFontRun_INTERNAL_API_DO_NOT_USE
+# define pl_addLocaleRun pl_addLocaleRun_INTERNAL_API_DO_NOT_USE
+# define pl_addValueRun pl_addValueRun_INTERNAL_API_DO_NOT_USE
+# define pl_close pl_close_INTERNAL_API_DO_NOT_USE
+# define pl_closeFontRuns pl_closeFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_closeLine pl_closeLine_INTERNAL_API_DO_NOT_USE
+# define pl_closeLocaleRuns pl_closeLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_closeValueRuns pl_closeValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_countLineRuns pl_countLineRuns_INTERNAL_API_DO_NOT_USE
+# define pl_create pl_create_INTERNAL_API_DO_NOT_USE
+# define pl_getAscent pl_getAscent_INTERNAL_API_DO_NOT_USE
+# define pl_getDescent pl_getDescent_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunCount pl_getFontRunCount_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunFont pl_getFontRunFont_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunLastLimit pl_getFontRunLastLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunLimit pl_getFontRunLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getLeading pl_getLeading_INTERNAL_API_DO_NOT_USE
+# define pl_getLineAscent pl_getLineAscent_INTERNAL_API_DO_NOT_USE
+# define pl_getLineDescent pl_getLineDescent_INTERNAL_API_DO_NOT_USE
+# define pl_getLineLeading pl_getLineLeading_INTERNAL_API_DO_NOT_USE
+# define pl_getLineVisualRun pl_getLineVisualRun_INTERNAL_API_DO_NOT_USE
+# define pl_getLineWidth pl_getLineWidth_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunCount pl_getLocaleRunCount_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunLastLimit pl_getLocaleRunLastLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunLimit pl_getLocaleRunLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunLocale pl_getLocaleRunLocale_INTERNAL_API_DO_NOT_USE
+# define pl_getParagraphLevel pl_getParagraphLevel_INTERNAL_API_DO_NOT_USE
+# define pl_getTextDirection pl_getTextDirection_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunCount pl_getValueRunCount_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunLastLimit pl_getValueRunLastLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunLimit pl_getValueRunLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunValue pl_getValueRunValue_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunAscent pl_getVisualRunAscent_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunDescent pl_getVisualRunDescent_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunDirection pl_getVisualRunDirection_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunFont pl_getVisualRunFont_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunGlyphCount pl_getVisualRunGlyphCount_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunGlyphToCharMap pl_getVisualRunGlyphToCharMap_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunGlyphs pl_getVisualRunGlyphs_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunLeading pl_getVisualRunLeading_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunPositions pl_getVisualRunPositions_INTERNAL_API_DO_NOT_USE
+# define pl_isComplex pl_isComplex_INTERNAL_API_DO_NOT_USE
+# define pl_line pl_line_INTERNAL_API_DO_NOT_USE
+# define pl_nextLine pl_nextLine_INTERNAL_API_DO_NOT_USE
+# define pl_openEmptyFontRuns pl_openEmptyFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openEmptyLocaleRuns pl_openEmptyLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openEmptyValueRuns pl_openEmptyValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openFontRuns pl_openFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openLocaleRuns pl_openLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openValueRuns pl_openValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_paragraph pl_paragraph_INTERNAL_API_DO_NOT_USE
+# define pl_reflow pl_reflow_INTERNAL_API_DO_NOT_USE
+# define pl_resetFontRuns pl_resetFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_resetLocaleRuns pl_resetLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_resetValueRuns pl_resetValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_visualRun pl_visualRun_INTERNAL_API_DO_NOT_USE
+# define ucol_collatorToIdentifier ucol_collatorToIdentifier_INTERNAL_API_DO_NOT_USE
+# define ucol_equals ucol_equals_INTERNAL_API_DO_NOT_USE
+# define ucol_forgetUCA ucol_forgetUCA_INTERNAL_API_DO_NOT_USE
+# define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_INTERNAL_API_DO_NOT_USE
+# define ucol_getUnsafeSet ucol_getUnsafeSet_INTERNAL_API_DO_NOT_USE
+# define ucol_identifierToShortString ucol_identifierToShortString_INTERNAL_API_DO_NOT_USE
+# define ucol_openFromIdentifier ucol_openFromIdentifier_INTERNAL_API_DO_NOT_USE
+# define ucol_prepareShortStringOpen ucol_prepareShortStringOpen_INTERNAL_API_DO_NOT_USE
+# define ucol_shortStringToIdentifier ucol_shortStringToIdentifier_INTERNAL_API_DO_NOT_USE
+# define uprv_getDefaultCodepage uprv_getDefaultCodepage_INTERNAL_API_DO_NOT_USE
+# define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_INTERNAL_API_DO_NOT_USE
+# define ures_openFillIn ures_openFillIn_INTERNAL_API_DO_NOT_USE
+# define utf8_appendCharSafeBody utf8_appendCharSafeBody_INTERNAL_API_DO_NOT_USE
+# define utf8_back1SafeBody utf8_back1SafeBody_INTERNAL_API_DO_NOT_USE
+# define utf8_countTrailBytes utf8_countTrailBytes_INTERNAL_API_DO_NOT_USE
+# define utf8_nextCharSafeBody utf8_nextCharSafeBody_INTERNAL_API_DO_NOT_USE
+# define utf8_prevCharSafeBody utf8_prevCharSafeBody_INTERNAL_API_DO_NOT_USE
+# else
+# define RegexPatternDump_3_8 RegexPatternDump_INTERNAL_API_DO_NOT_USE
+# define pl_addFontRun_3_8 pl_addFontRun_INTERNAL_API_DO_NOT_USE
+# define pl_addLocaleRun_3_8 pl_addLocaleRun_INTERNAL_API_DO_NOT_USE
+# define pl_addValueRun_3_8 pl_addValueRun_INTERNAL_API_DO_NOT_USE
+# define pl_closeFontRuns_3_8 pl_closeFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_closeLine_3_8 pl_closeLine_INTERNAL_API_DO_NOT_USE
+# define pl_closeLocaleRuns_3_8 pl_closeLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_closeValueRuns_3_8 pl_closeValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_close_3_8 pl_close_INTERNAL_API_DO_NOT_USE
+# define pl_countLineRuns_3_8 pl_countLineRuns_INTERNAL_API_DO_NOT_USE
+# define pl_create_3_8 pl_create_INTERNAL_API_DO_NOT_USE
+# define pl_getAscent_3_8 pl_getAscent_INTERNAL_API_DO_NOT_USE
+# define pl_getDescent_3_8 pl_getDescent_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunCount_3_8 pl_getFontRunCount_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunFont_3_8 pl_getFontRunFont_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunLastLimit_3_8 pl_getFontRunLastLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getFontRunLimit_3_8 pl_getFontRunLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getLeading_3_8 pl_getLeading_INTERNAL_API_DO_NOT_USE
+# define pl_getLineAscent_3_8 pl_getLineAscent_INTERNAL_API_DO_NOT_USE
+# define pl_getLineDescent_3_8 pl_getLineDescent_INTERNAL_API_DO_NOT_USE
+# define pl_getLineLeading_3_8 pl_getLineLeading_INTERNAL_API_DO_NOT_USE
+# define pl_getLineVisualRun_3_8 pl_getLineVisualRun_INTERNAL_API_DO_NOT_USE
+# define pl_getLineWidth_3_8 pl_getLineWidth_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunCount_3_8 pl_getLocaleRunCount_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunLastLimit_3_8 pl_getLocaleRunLastLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunLimit_3_8 pl_getLocaleRunLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getLocaleRunLocale_3_8 pl_getLocaleRunLocale_INTERNAL_API_DO_NOT_USE
+# define pl_getParagraphLevel_3_8 pl_getParagraphLevel_INTERNAL_API_DO_NOT_USE
+# define pl_getTextDirection_3_8 pl_getTextDirection_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunCount_3_8 pl_getValueRunCount_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunLastLimit_3_8 pl_getValueRunLastLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunLimit_3_8 pl_getValueRunLimit_INTERNAL_API_DO_NOT_USE
+# define pl_getValueRunValue_3_8 pl_getValueRunValue_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunAscent_3_8 pl_getVisualRunAscent_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunDescent_3_8 pl_getVisualRunDescent_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunDirection_3_8 pl_getVisualRunDirection_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunFont_3_8 pl_getVisualRunFont_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunGlyphCount_3_8 pl_getVisualRunGlyphCount_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunGlyphToCharMap_3_8 pl_getVisualRunGlyphToCharMap_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunGlyphs_3_8 pl_getVisualRunGlyphs_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunLeading_3_8 pl_getVisualRunLeading_INTERNAL_API_DO_NOT_USE
+# define pl_getVisualRunPositions_3_8 pl_getVisualRunPositions_INTERNAL_API_DO_NOT_USE
+# define pl_isComplex_3_8 pl_isComplex_INTERNAL_API_DO_NOT_USE
+# define pl_line_3_8 pl_line_INTERNAL_API_DO_NOT_USE
+# define pl_nextLine_3_8 pl_nextLine_INTERNAL_API_DO_NOT_USE
+# define pl_openEmptyFontRuns_3_8 pl_openEmptyFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openEmptyLocaleRuns_3_8 pl_openEmptyLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openEmptyValueRuns_3_8 pl_openEmptyValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openFontRuns_3_8 pl_openFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openLocaleRuns_3_8 pl_openLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_openValueRuns_3_8 pl_openValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_paragraph_3_8 pl_paragraph_INTERNAL_API_DO_NOT_USE
+# define pl_reflow_3_8 pl_reflow_INTERNAL_API_DO_NOT_USE
+# define pl_resetFontRuns_3_8 pl_resetFontRuns_INTERNAL_API_DO_NOT_USE
+# define pl_resetLocaleRuns_3_8 pl_resetLocaleRuns_INTERNAL_API_DO_NOT_USE
+# define pl_resetValueRuns_3_8 pl_resetValueRuns_INTERNAL_API_DO_NOT_USE
+# define pl_visualRun_3_8 pl_visualRun_INTERNAL_API_DO_NOT_USE
+# define ucol_collatorToIdentifier_3_8 ucol_collatorToIdentifier_INTERNAL_API_DO_NOT_USE
+# define ucol_equals_3_8 ucol_equals_INTERNAL_API_DO_NOT_USE
+# define ucol_forgetUCA_3_8 ucol_forgetUCA_INTERNAL_API_DO_NOT_USE
+# define ucol_getAttributeOrDefault_3_8 ucol_getAttributeOrDefault_INTERNAL_API_DO_NOT_USE
+# define ucol_getUnsafeSet_3_8 ucol_getUnsafeSet_INTERNAL_API_DO_NOT_USE
+# define ucol_identifierToShortString_3_8 ucol_identifierToShortString_INTERNAL_API_DO_NOT_USE
+# define ucol_openFromIdentifier_3_8 ucol_openFromIdentifier_INTERNAL_API_DO_NOT_USE
+# define ucol_prepareShortStringOpen_3_8 ucol_prepareShortStringOpen_INTERNAL_API_DO_NOT_USE
+# define ucol_shortStringToIdentifier_3_8 ucol_shortStringToIdentifier_INTERNAL_API_DO_NOT_USE
+# define uprv_getDefaultCodepage_3_8 uprv_getDefaultCodepage_INTERNAL_API_DO_NOT_USE
+# define uprv_getDefaultLocaleID_3_8 uprv_getDefaultLocaleID_INTERNAL_API_DO_NOT_USE
+# define ures_openFillIn_3_8 ures_openFillIn_INTERNAL_API_DO_NOT_USE
+# define utf8_appendCharSafeBody_3_8 utf8_appendCharSafeBody_INTERNAL_API_DO_NOT_USE
+# define utf8_back1SafeBody_3_8 utf8_back1SafeBody_INTERNAL_API_DO_NOT_USE
+# define utf8_countTrailBytes_3_8 utf8_countTrailBytes_INTERNAL_API_DO_NOT_USE
+# define utf8_nextCharSafeBody_3_8 utf8_nextCharSafeBody_INTERNAL_API_DO_NOT_USE
+# define utf8_prevCharSafeBody_3_8 utf8_prevCharSafeBody_INTERNAL_API_DO_NOT_USE
+# endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_INTERNAL_API */
+#endif /* UINTRNAL_H */
+
diff --git a/jni/EastAsianWidth/unicode/uiter.h b/jni/EastAsianWidth/unicode/uiter.h
new file mode 100644
index 0000000..9409f01
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uiter.h
@@ -0,0 +1,707 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#ifdef XP_CPLUSPLUS
+ U_NAMESPACE_BEGIN
+
+ class CharacterIterator;
+ class Replaceable;
+
+ U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+ UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+ /**
+ * Constant value that may be returned by UCharIteratorMove
+ * indicating that the final UTF-16 index is not known, but that the move succeeded.
+ * This can occur when moving relative to limit or length, or
+ * when moving relative to the current index after a setState()
+ * when the current UTF-16 index is not known.
+ *
+ * It would be very inefficient to have to count from the beginning of the text
+ * just to get the current/limit/length index after moving relative to it.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ *
+ * @stable ICU 2.6
+ */
+ UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ * or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+ /**
+ * (protected) Pointer to string or wrapped object or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ const void *context;
+
+ /**
+ * (protected) Length of string or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t length;
+
+ /**
+ * (protected) Start index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t start;
+
+ /**
+ * (protected) Current index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t index;
+
+ /**
+ * (protected) Limit index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t limit;
+
+ /**
+ * (protected) Used by UTF-8 iterators and possibly others.
+ * @stable ICU 2.1
+ */
+ int32_t reservedField;
+
+ /**
+ * (public) Returns the current position or the
+ * start or limit index of the iteration range.
+ *
+ * @see UCharIteratorGetIndex
+ * @stable ICU 2.1
+ */
+ UCharIteratorGetIndex *getIndex;
+
+ /**
+ * (public) Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ *
+ * @see UCharIteratorMove
+ * @stable ICU 2.1
+ */
+ UCharIteratorMove *move;
+
+ /**
+ * (public) Check if current() and next() can still
+ * return another code unit.
+ *
+ * @see UCharIteratorHasNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasNext *hasNext;
+
+ /**
+ * (public) Check if previous() can still return another code unit.
+ *
+ * @see UCharIteratorHasPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasPrevious *hasPrevious;
+
+ /**
+ * (public) Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorCurrent
+ * @stable ICU 2.1
+ */
+ UCharIteratorCurrent *current;
+
+ /**
+ * (public) Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorNext *next;
+
+ /**
+ * (public) Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @see UCharIteratorPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorPrevious *previous;
+
+ /**
+ * (public) Reserved for future use. Currently NULL.
+ *
+ * @see UCharIteratorReserved
+ * @stable ICU 2.1
+ */
+ UCharIteratorReserved *reservedFn;
+
+ /**
+ * (public) Return the state of the iterator, to be restored later with setState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorGet
+ * @stable ICU 2.6
+ */
+ UCharIteratorGetState *getState;
+
+ /**
+ * (public) Restore the iterator state from the state word from a call
+ * to getState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorSet
+ * @stable ICU 2.6
+ */
+ UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_STABLE uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ * (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ * of a surrogate pair
+ * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#ifdef XP_CPLUSPLUS
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const U_NAMESPACE_QUALIFIER Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/uloc.h b/jni/EastAsianWidth/unicode/uloc.h
new file mode 100644
index 0000000..0d5d3b3
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uloc.h
@@ -0,0 +1,931 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 08/22/98 stephen JDK 1.2 sync.
+* 12/08/98 rtg New C API for Locale
+* 03/30/99 damiba overhaul
+* 03/31/99 helena Javadoc for uloc functions.
+* 04/15/99 Madhu Updated Javadoc
+********************************************************************************
+*/
+
+#ifndef ULOC_H
+#define ULOC_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Locale
+ *
+ * ULoc C API for Locale
+ * A Locale represents a specific geographical, political,
+ * or cultural region. An operation that requires a Locale to perform
+ * its task is called locale-sensitive and uses the Locale
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture. In the C APIs, a locales is simply a const char string.
+ *
+ * Locale with one of the three options listed below.
+ * Each of the component is separated by '_' in the locale string.
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ * The first option is a valid ISO
+ * Language Code. These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at a number of sites, such as:
+ *
+ * \code
+ * newLanguage
+ *
+ * newLanguage + newCountry
+ *
+ * newLanguage + newCountry + newVariant
+ * \endcode
+ *
+ * \htmlonly
+ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt
+ *
+ *
+ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html
+ *
+ * Locale is just an identifier for a region,
+ * no validity check is performed when you specify a Locale.
+ * If you want to see whether particular resources are available for the
+ * Locale you asked for, you must query those resources. For
+ * example, ask the UNumberFormat for the locales it supports
+ * using its getAvailable method.
+ *
Note: When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * UResourceBundle.
+ *
+ * Locale provides a number of convenient constants
+ * that you can use to specify the commonly used
+ * locales. For example, the following refers to a locale
+ * for the United States:
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ *
+ *
+ * \code
+ * ULOC_US
+ * \endcode
+ *
+ * \htmlonlyuloc_getCountry to get the ISO Country Code and
+ * uloc_getLanguage to get the ISO Language Code. You can
+ * use uloc_getDisplayCountry to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use uloc_getDisplayLanguage to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the uloc_getDisplayXXX methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * unum_xxx functions format
+ * numbers, currency, or percentages in a locale-sensitive manner.
+ * \endhtmlonly
+ *
\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly
+ * \code
+ * UErrorCode success = U_ZERO_ERROR;
+ * UNumberFormat *nf;
+ * const char* myLocale = "fr_FR";
+ *
+ * nf = unum_open( UNUM_DEFAULT, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, NULL, success );
+ * unum_close(nf);
+ * \endcode
+ *
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ * A
+ * \code
+ *
+ * nf = unum_open( UNUM_DEFAULT, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, myLocale, success );
+ * unum_close(nf);
+ * \endcode
+ *
+ * \htmlonlyLocale is the mechanism for identifying the kind of services
+ * (UNumberFormat) that you would like to get. The locale is
+ * just a mechanism for identifying these services.
+ *
+ * \endhtmlonly
+ *
\endhtmlonly
+ *
+ * \code
+ * const char* uloc_getAvailable(int32_t index);
+ * int32_t uloc_countAvailable();
+ * int32_t
+ * uloc_getDisplayName(const char* localeID,
+ * const char* inLocaleID,
+ * UChar* result,
+ * int32_t maxResultSize,
+ * UErrorCode* err);
+ *
+ * \endcode
+ *
+ * \htmlonly
+ * UErrorCode status = U_ZERO_ERROR;
+ * const char* keyword =NULL;
+ * int32_t keywordLen = 0;
+ * int32_t keywordCount = 0;
+ * UChar displayKeyword[256];
+ * int32_t displayKeywordLen = 0;
+ * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ * if(U_FAILURE(status)){
+ * ...something went wrong so handle the error...
+ * break;
+ * }
+ * // the uenum_next returns NUL terminated string
+ * keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ * ... do something interesting .....
+ * }
+ * uenum_close(keywordEnum);
+ *
+ * @param keyword The keyword whose display string needs to be returned.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and should not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @see #uloc_openKeywords
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword The keyword for whose value should be used.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and must not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the full name suitable for display for the specified locale.
+ *
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
+ * @param inLocaleID Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param result the displayable name for localeID
+ * @param maxResultSize the size of the name buffer to store the
+ * displayable full name with
+ * @param err error information if retrieving the displayable name failed
+ * @return the actual buffer size needed for the displayable name. If it's greater
+ * than maxResultSize, the returned displayable name will be truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayName(const char* localeID,
+ const char* inLocaleID,
+ UChar* result,
+ int32_t maxResultSize,
+ UErrorCode* err);
+
+
+/**
+ * Gets the specified locale from a list of all available locales.
+ * The return value is a pointer to an item of
+ * a locale name array. Both this array and the pointers
+ * it contains are owned by ICU and should not be deleted or written through
+ * by the caller. The locale name is terminated by a null pointer.
+ * @param n the specific locale name index of the available locale list
+ * @return a specified locale name of all available locales
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getAvailable(int32_t n);
+
+/**
+ * Gets the size of the all available locale list.
+ *
+ * @return the size of the locale list
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
+
+/**
+ *
+ * Gets a list of all available language codes defined in ISO 639. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+U_STABLE const char* const* U_EXPORT2
+uloc_getISOLanguages(void);
+
+/**
+ *
+ * Gets a list of all available 2-letter country codes defined in ISO 639. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+U_STABLE const char* const* U_EXPORT2
+uloc_getISOCountries(void);
+
+/**
+ * Truncate the locale ID string to get the parent locale ID.
+ * Copies the part of the string before the last underscore.
+ * The parent locale ID will be an empty string if there is no
+ * underscore, or if there is only one underscore at localeID[0].
+ *
+ * @param localeID Input locale ID string.
+ * @param parent Output string buffer for the parent locale ID.
+ * @param parentCapacity Size of the output buffer.
+ * @param err A UErrorCode value.
+ * @return The length of the parent locale ID.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getParent(const char* localeID,
+ char* parent,
+ int32_t parentCapacity,
+ UErrorCode* err);
+
+
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API strips off the keyword part, so "de_DE\@collation=phonebook"
+ * will become "de_DE".
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @stable ICU 2.8
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ *
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length of keyword value
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Set the value of the specified keyword.
+ * NOTE: Unlike almost every other ICU function which takes a
+ * buffer, this function will NOT truncate the output text. If a
+ * BUFFER_OVERFLOW_ERROR is received, it means that the original
+ * buffer is untouched. This is done to prevent incorrect or possibly
+ * even malformed locales from being generated and used.
+ *
+ * @param keywordName name of the keyword to be set. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param buffer input buffer containing locale to be modified.
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length needed for the buffer
+ * @see uloc_getKeywordValue
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+/**
+ * enums for the 'outResult' parameter return value
+ * @see uloc_acceptLanguageFromHTTP
+ * @see uloc_acceptLanguage
+ * @stable ICU 3.2
+ */
+typedef enum {
+ ULOC_ACCEPT_FAILED = 0, /* No exact match was found. */
+ ULOC_ACCEPT_VALID = 1, /* An exact match was found. */
+ ULOC_ACCEPT_FALLBACK = 2 /* A fallback was found, for example,
+ Accept list contained 'ja_JP'
+ which matched available locale 'ja'. */
+} UAcceptResult;
+
+
+/**
+ * Based on a HTTP header from a web browser and a list of available locales,
+ * determine an acceptable locale for the user.
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
+ * @param availableLocales - list of available locales to match
+ * @param status Error status, may be BUFFER_OVERFLOW_ERROR
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char *httpAcceptLanguage,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+/**
+ * Based on a list of available locales,
+ * determine an acceptable locale for the user.
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param acceptList - list of acceptable languages
+ * @param acceptListCount - count of acceptList items
+ * @param availableLocales - list of available locales to match
+ * @param status Error status, may be BUFFER_OVERFLOW_ERROR
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult, const char **acceptList,
+ int32_t acceptListCount,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+
+/**
+ * Gets the ICU locale ID for the specified Win32 LCID value.
+ *
+ * @param hostID the Win32 LCID to translate
+ * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
+ * if there is room.
+ * @param localeCapacity the size of the output buffer
+ * @param status an error is returned if the LCID is unrecognized or the output buffer
+ * is too small
+ * @return actual the actual size of the locale ID, not including NUL-termination
+ * @draft ICU 3.8
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
+ UErrorCode *status);
+
+#endif /*_ULOC*/
+
+
diff --git a/jni/EastAsianWidth/unicode/umachine.h b/jni/EastAsianWidth/unicode/umachine.h
new file mode 100644
index 0000000..083f9cf
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/umachine.h
@@ -0,0 +1,338 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: umachine.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*
+* This file defines basic types and constants for utf.h to be
+* platform-independent. umachine.h and utf.h are included into
+* utypes.h to provide all the general definitions for ICU.
+* All of these definitions used to be in utypes.h before
+* the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * Basic types and constants for UTF
+ * This file defines basic types and constants for utf.h to be
+ * platform-independent. umachine.h and utf.h are included into
+ * utypes.h to provide all the general definitions for ICU.
+ * All of these definitions used to be in utypes.h before
+ * the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions */
+/* which are contained in the platform-specific file platform.h */
+/*==========================================================================*/
+
+#if defined(U_PALMOS)
+# include "unicode/ppalmos.h"
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# include "unicode/pwin32.h"
+#else
+# include "unicode/platform.h"
+#endif
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include UnicodeFilter defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * UnicodeFunctor is an abstract base class for objects
+ * that perform match and/or replace operations on Unicode strings.
+ * @author Alan Liu
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeFunctor : public UObject {
+
+public:
+
+ /**
+ * Destructor
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeFunctor();
+
+ /**
+ * Return a copy of this object. All UnicodeFunctor objects
+ * have to support cloning in order to allow classes using
+ * UnicodeFunctor to implement cloning.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeFunctor* clone() const = 0;
+
+ /**
+ * Cast 'this' to a UnicodeMatcher* pointer and return the
+ * pointer, or null if this is not a UnicodeMatcher*. Subclasses
+ * that mix in UnicodeMatcher as a base class must override this.
+ * This protocol is required because a pointer to a UnicodeFunctor
+ * cannot be cast to a pointer to a UnicodeMatcher, since
+ * UnicodeMatcher is a mixin that does not derive from
+ * UnicodeFunctor.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeMatcher* toMatcher() const;
+
+ /**
+ * Cast 'this' to a UnicodeReplacer* pointer and return the
+ * pointer, or null if this is not a UnicodeReplacer*. Subclasses
+ * that mix in UnicodeReplacer as a base class must override this.
+ * This protocol is required because a pointer to a UnicodeFunctor
+ * cannot be cast to a pointer to a UnicodeReplacer, since
+ * UnicodeReplacer is a mixin that does not derive from
+ * UnicodeFunctor.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeReplacer* toReplacer() const;
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID().
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID polymorphically. This method
+ * is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * UnicodeMatcher::matches()
+ * indicating the degree of match.
+ * @stable ICU 2.4
+ */
+enum UMatchDegree {
+ /**
+ * Constant returned by matches() indicating a
+ * mismatch between the text and this matcher. The text contains
+ * a character which does not match, or the text does not contain
+ * all desired characters for a non-incremental match.
+ * @stable ICU 2.4
+ */
+ U_MISMATCH,
+
+ /**
+ * Constant returned by matches() indicating a
+ * partial match between the text and this matcher. This value is
+ * only returned for incremental match operations. All characters
+ * of the text match, but more characters are required for a
+ * complete match. Alternatively, for variable-length matchers,
+ * all characters of the text match, and if more characters were
+ * supplied at limit, they might also match.
+ * @stable ICU 2.4
+ */
+ U_PARTIAL_MATCH,
+
+ /**
+ * Constant returned by matches() indicating a
+ * complete match between the text and this matcher. For an
+ * incremental variable-length match, this value is returned if
+ * the given text matches, and it is known that additional
+ * characters would not alter the extent of the match.
+ * @stable ICU 2.4
+ */
+ U_MATCH
+};
+
+/**
+ * UnicodeMatcher defines a protocol for objects that can
+ * match a range of characters in a Replaceable string.
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeMatcher();
+
+ /**
+ * Return a UMatchDegree value indicating the degree of match for
+ * the given text at the given offset. Zero, one, or more
+ * characters may be matched.
+ *
+ * Matching in the forward direction is indicated by limit >
+ * offset. Characters from offset forwards to limit-1 will be
+ * considered for matching.
+ *
+ * Matching in the reverse direction is indicated by limit <
+ * offset. Characters from offset backwards to limit+1 will be
+ * considered for matching.
+ *
+ * If limit == offset then the only match possible is a zero
+ * character match (which subclasses may implement if desired).
+ *
+ * As a side effect, advance the offset parameter to the limit of
+ * the matched substring. In the forward direction, this will be
+ * the index of the last matched character plus one. In the
+ * reverse direction, this will be the index of the last matched
+ * character minus one.
+ *
+ * UnicodeSet supports two APIs. The first is the
+ * operand API that allows the caller to modify the value of
+ * a UnicodeSet object. It conforms to Java 2's
+ * java.util.Set interface, although
+ * UnicodeSet does not actually implement that
+ * interface. All methods of Set are supported, with the
+ * modification that they take a character range or single character
+ * instead of an Object, and they take a
+ * UnicodeSet instead of a Collection. The
+ * operand API may be thought of in terms of boolean logic: a boolean
+ * OR is implemented by add, a boolean AND is implemented
+ * by retain, a boolean XOR is implemented by
+ * complement taking an argument, and a boolean NOT is
+ * implemented by complement with no argument. In terms
+ * of traditional set theory function names, add is a
+ * union, retain is an intersection, remove
+ * is an asymmetric difference, and complement with no
+ * argument is a set complement with respect to the superset range
+ * MIN_VALUE-MAX_VALUE
+ *
+ * applyPattern()/toPattern() API from the
+ * java.text.Format-derived classes. Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method applyPattern() sets all
+ * attributes of a UnicodeSet at once, based on a
+ * string pattern.
+ *
+ * applyPattern() methods and returned by the
+ * toPattern() method. These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes. Here are some simple examples:
+ *
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ *
+ * Any character may be preceded by a backslash in order to remove any special
+ * meaning. White space characters, as defined by UCharacter.isWhitespace(), are
+ * ignored, unless they are escaped.
+ *
+ *
+ *
+ * \htmlonly
+ *
+ * []No characters
+ *
+ *
+ * [a]The character 'a'
+ *
+ *
+ *
+ * [ae]The characters 'a' and 'e'
+ *
+ *
+ *
+ * [a-e]The characters 'a' through 'e' inclusive, in Unicode code
+ * point order
+ *
+ *
+ *
+ * [\\u4E01]The character U+4E01
+ *
+ *
+ *
+ * [a{ab}{ac}]The character 'a' and the multicharacter strings "ab" and
+ * "ac"
+ *
+ *
+ *
+ * [\\p{Lu}]All characters in the general category Uppercase Letter
+ *
+ *
+ *
+ * [a]The set containing 'a'
+ * [a-z]The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * [^a-z]The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
+ * [[pat1][pat2]]
+ * The union of sets specified by pat1 and pat2
+ * [[pat1]&[pat2]]
+ * The intersection of sets specified by pat1 and pat2
+ * [[pat1]-[pat2]]
+ * The asymmetric difference of sets specified by pat1 and
+ * pat2
+ * [:Lu:] or \\p{Lu}
+ * The set of characters having the specified
+ * Unicode property; in
+ * this case, Unicode uppercase letters
+ * [:^Lu:] or \\P{Lu}
+ * The set of characters not having the given
+ * Unicode property
+ * \endhtmlonly
+ *
\endhtmlonly
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeSet : public UnicodeFilter {
+
+ int32_t len; // length of list used; 0 <= len <= capacity
+ int32_t capacity; // capacity of list
+ UChar32* list; // MUST be terminated with HIGH
+ BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
+ UChar32* buffer; // internal buffer, may be NULL
+ int32_t bufferCapacity; // capacity of buffer
+ int32_t patLen;
+
+ /**
+ * The pattern representation of this set. This may not be the
+ * most economical pattern. It is the pattern supplied to
+ * applyPattern(), with variables substituted and whitespace
+ * removed. For sets constructed without applyPattern(), or
+ * modified using the non-pattern API, this string will be empty,
+ * indicating that toPattern() must generate a pattern
+ * representation from the inversion list.
+ */
+ UChar *pat;
+ UVector* strings; // maintained in sorted order
+ UnicodeSetStringSpan *stringSpan;
+
+public:
+
+ enum {
+ /**
+ * Minimum value that can be stored in a UnicodeSet.
+ * @stable ICU 2.4
+ */
+ MIN_VALUE = 0,
+
+ /**
+ * Maximum value that can be stored in a UnicodeSet.
+ * @stable ICU 2.4
+ */
+ MAX_VALUE = 0x10ffff
+ };
+
+ //----------------------------------------------------------------
+ // Constructors &c
+ //----------------------------------------------------------------
+
+public:
+
+ /**
+ * Constructs an empty set.
+ * @stable ICU 2.0
+ */
+ UnicodeSet();
+
+ /**
+ * Constructs a set containing the given range. If
+ *
+ *
+ *
+ *
+ * pattern :=
+ * ('[' '^'? item* ']') |
+ * property
+ *
+ *
+ * item :=
+ * char | (char '-' char) | pattern-expr
+ *
+ *
+ *
+ * pattern-expr :=
+ * pattern | pattern-expr pattern |
+ * pattern-expr op pattern
+ *
+ *
+ *
+ * op :=
+ * '&' | '-'
+ *
+ *
+ *
+ * special :=
+ * '[' | ']' | '-'
+ *
+ *
+ *
+ * char := any character that is not
+ * specialany character
+ * | ('\' )
+ * | ('\\u' hex hex hex hex)
+ *
+ *
+ *
+ * hex := any character for which
+ *
+ * Character.digit(c, 16)
+ * returns a non-negative result
+ *
+ *
+ * property := a Unicode property set pattern
+ *
+ *
+ *
+ * \htmlonly
+ *
+ * Legend:
+ *
+ *
+ *
+ *
+ *
+ * a := b
+ *
+ * a may be replaced by b
+ *
+ *
+ * a?
+ * zero or one instance of
+ * a
+ *
+ *
+ *
+ * a*
+ * one or more instances of
+ * a
+ *
+ *
+ *
+ * a | b
+ * either
+ * a or b
+ *
+ *
+ *
+ * 'a'
+ * the literal string between the quotes
+ * end >
+ * start then an empty set is created.
+ *
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 2.4
+ */
+ UnicodeSet(UChar32 start, UChar32 end);
+
+ /**
+ * Constructs a set from the given pattern. See the class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern
+ * contains a syntax error.
+ * @stable ICU 2.0
+ */
+ UnicodeSet(const UnicodeString& pattern,
+ UErrorCode& status);
+
+ /**
+ * Constructs a set from the given pattern. See the class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be NULL
+ * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern
+ * contains a syntax error.
+ * @internal
+ */
+ UnicodeSet(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Constructs a set from the given pattern. See the class description
+ * for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param pos on input, the position in pattern at which to start parsing.
+ * On output, the position after the last character parsed.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be NULL
+ * @param status input-output error code
+ * @stable ICU 2.8
+ */
+ UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Constructs a set that is identical to the given UnicodeSet.
+ * @stable ICU 2.0
+ */
+ UnicodeSet(const UnicodeSet& o);
+
+ /**
+ * Destructs the set.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeSet();
+
+ /**
+ * Assigns this object to be a copy of another.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& operator=(const UnicodeSet& o);
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * true if the two sets
+ * have the same size, and every member of the specified set is
+ * contained in this set (or equivalently, every member of this set is
+ * contained in the specified set).
+ *
+ * @param o set to be compared for equality with this set.
+ * @return true if the specified set is equal to this set.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const UnicodeSet& o) const;
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * true if the specified set is not equal to this set.
+ * @stable ICU 2.0
+ */
+ UBool operator!=(const UnicodeSet& o) const;
+
+ /**
+ * Returns a copy of this object. All UnicodeFunctor objects have
+ * to support cloning in order to allow classes using
+ * UnicodeFunctors, such as Transliterator, to implement cloning.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use cloneAsThawed() for a mutable clone of a frozen set.
+ * @see cloneAsThawed
+ * @stable ICU 2.0
+ */
+ virtual UnicodeFunctor* clone() const;
+
+ /**
+ * Returns the hash code value for this set.
+ *
+ * @return the hash code value for this set.
+ * @see Object#hashCode()
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ //----------------------------------------------------------------
+ // Freezable API
+ //----------------------------------------------------------------
+
+ /**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @return TRUE/FALSE for whether the set has been frozen
+ * @see freeze
+ * @see cloneAsThawed
+ * @draft ICU 3.8
+ */
+ inline UBool isFrozen() const;
+
+ /**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * contains() and span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @return this set.
+ * @see isFrozen
+ * @see cloneAsThawed
+ * @draft ICU 3.8
+ */
+ UnicodeFunctor *freeze();
+
+ /**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @return the mutable clone
+ * @see freeze
+ * @see isFrozen
+ * @draft ICU 3.8
+ */
+ UnicodeFunctor *cloneAsThawed() const;
+
+ //----------------------------------------------------------------
+ // Public API
+ //----------------------------------------------------------------
+
+ /**
+ * Make this object represent the range start - end.
+ * If end > start then this object is set to an
+ * an empty range.
+ * A frozen set will not be modified.
+ *
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 2.4
+ */
+ UnicodeSet& set(UChar32 start, UChar32 end);
+
+ /**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ * @stable ICU 2.4
+ */
+ static UBool resemblesPattern(const UnicodeString& pattern,
+ int32_t pos);
+
+ /**
+ * Modifies this set to represent the set specified by the given
+ * pattern, optionally ignoring white space. See the class
+ * description for the syntax of the pattern language.
+ * A frozen set will not be modified.
+ * @param pattern a string specifying what characters are in the set
+ * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern
+ * contains a syntax error.
+ * Empties the set passed before applying the pattern.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ UErrorCode& status);
+
+ /**
+ * Modifies this set to represent the set specified by the given
+ * pattern, optionally ignoring white space. See the class
+ * description for the syntax of the pattern language.
+ * A frozen set will not be modified.
+ * @param pattern a string specifying what characters are in the set
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be NULL
+ * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern
+ * contains a syntax error.
+ * Empties the set passed before applying the pattern.
+ * @return a reference to this
+ * @internal
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Parses the given pattern, starting at the given position. The
+ * character at pattern.charAt(pos.getIndex()) must be '[', or the
+ * parse fails. Parsing continues until the corresponding closing
+ * ']'. If a syntax error is encountered between the opening and
+ * closing brace, the parse fails. Upon return from a successful
+ * parse, the ParsePosition is updated to point to the character
+ * following the closing ']', and a StringBuffer containing a
+ * pairs list for the parsed pattern is returned. This method calls
+ * itself recursively to parse embedded subpatterns.
+ * Empties the set passed before applying the pattern.
+ * A frozen set will not be modified.
+ *
+ * @param pattern the string containing the pattern to be parsed.
+ * The portion of the string from pos.getIndex(), which must be a
+ * '[', to the corresponding closing ']', is parsed.
+ * @param pos upon entry, the position at which to being parsing.
+ * The character at pattern.charAt(pos.getIndex()) must be a '['.
+ * Upon return from a successful parse, pos.getIndex() is either
+ * the character after the closing ']' of the parsed pattern, or
+ * pattern.length() if the closing ']' is the last character of
+ * the pattern string.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be NULL
+ * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern
+ * contains a syntax error.
+ * @return a reference to this
+ * @stable ICU 2.8
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a UnicodeSet constructor, it
+ * will produce another set that is equal to this one.
+ * A frozen set will not be modified.
+ * @param result the string to receive the rules. Previous
+ * contents will be deleted.
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& toPattern(UnicodeString& result,
+ UBool escapeUnprintable = FALSE) const;
+
+ /**
+ * Modifies this set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @return a reference to this set
+ *
+ * @stable ICU 2.4
+ */
+ UnicodeSet& applyIntPropertyValue(UProperty prop,
+ int32_t value,
+ UErrorCode& ec);
+
+ /**
+ * Modifies this set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param prop a property alias, either short or long. The name is matched
+ * loosely. See PropertyAliases.txt for names and a description of loose
+ * matching. If the value string is empty, then this string is interpreted
+ * as either a General_Category value alias, a Script value alias, a binary
+ * property alias, or a special ID. Special IDs are matched loosely and
+ * correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param value a value alias, either short or long. The name is matched
+ * loosely. See PropertyValueAliases.txt for names and a description of
+ * loose matching. In addition to aliases listed, numeric values and
+ * canonical combining classes may be expressed numerically, e.g., ("nv",
+ * "0.5") or ("ccc", "220"). The value string may also be empty.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @return a reference to this set
+ *
+ * @stable ICU 2.4
+ */
+ UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
+ const UnicodeString& value,
+ UErrorCode& ec);
+
+ /**
+ * Returns the number of elements in this set (its cardinality).
+ * Note than the elements of a set may include both individual
+ * codepoints and strings.
+ *
+ * @return the number of elements in this set (its cardinality).
+ * @stable ICU 2.0
+ */
+ virtual int32_t size(void) const;
+
+ /**
+ * Returns true if this set contains no elements.
+ *
+ * @return true if this set contains no elements.
+ * @stable ICU 2.0
+ */
+ virtual UBool isEmpty(void) const;
+
+ /**
+ * Returns true if this set contains the given character.
+ * This function works faster with a frozen set.
+ * @param c character to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 c) const;
+
+ /**
+ * Returns true if this set contains every character
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains the given
+ * multicharacter string.
+ * @param s string to be checked for containment
+ * @return true if this set contains the specified string
+ * @stable ICU 2.4
+ */
+ UBool contains(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains all the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ virtual UBool containsAll(const UnicodeSet& c) const;
+
+ /**
+ * Returns true if this set contains all the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsAll(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains none of the characters
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains none of the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(const UnicodeSet& c) const;
+
+ /**
+ * Returns true if this set contains none of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * in the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * and strings of the given set.
+ * @param s The set to be checked for containment
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(const UnicodeSet& s) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(const UnicodeString& s) const;
+
+ /**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Implement UnicodeMatcher::matches()
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental);
+
+private:
+ /**
+ * Returns the longest match for s in text at the given position.
+ * If limit > start then match forward from start+1 to limit
+ * matching all characters except s.charAt(0). If limit < start,
+ * go backward starting from start-1 matching all characters
+ * except s.charAt(s.length()-1). This method assumes that the
+ * first character, text.charAt(start), matches s, so it does not
+ * check it.
+ * @param text the text to match
+ * @param start the first character to match. In the forward
+ * direction, text.charAt(start) is matched against s.charAt(0).
+ * In the reverse direction, it is matched against
+ * s.charAt(s.length()-1).
+ * @param limit the limit offset for matching, either last+1 in
+ * the forward direction, or last-1 in the reverse direction,
+ * where last is the index of the last character to match.
+ * @return If part of s matches up to the limit, return |limit -
+ * start|. If all of s matches before reaching the limit, return
+ * s.length(). If there is a mismatch between s and text, return
+ * 0
+ */
+ static int32_t matchRest(const Replaceable& text,
+ int32_t start, int32_t limit,
+ const UnicodeString& s);
+
+ /**
+ * Returns the smallest value i such that c < list[i]. Caller
+ * must ensure that c is a legal value or this method will enter
+ * an infinite loop. This method performs a binary search.
+ * @param c a character in the range MIN_VALUE..MAX_VALUE
+ * inclusive
+ * @return the smallest integer i in the range 0..len-1,
+ * inclusive, such that c < list[i]
+ */
+ int32_t findCodePoint(UChar32 c) const;
+
+public:
+
+ /**
+ * Implementation of UnicodeMatcher API. Union the set of all
+ * characters that may be matched by this object into the given
+ * set.
+ * @param toUnionTo the set into which to union the source characters
+ * @stable ICU 2.4
+ */
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
+ /**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * charAt().
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 2.4
+ */
+ int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * indexOf().
+ * @param index an index from 0..size()-1
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 2.4
+ */
+ UChar32 charAt(int32_t index) const;
+
+ /**
+ * Adds the specified range to this set if it is not already
+ * present. If this set already contains the specified range,
+ * the call leaves this set unchanged. If end > start
+ * then an empty range is added, leaving the set unchanged.
+ * This is equivalent to a boolean logic OR, or a set UNION.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be added
+ * to this set.
+ * @param end last character, inclusive, of range to be added
+ * to this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& add(UChar32 start, UChar32 end);
+
+ /**
+ * Adds the specified character to this set if it is not already
+ * present. If this set already contains the specified character,
+ * the call leaves this set unchanged.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& add(UChar32 c);
+
+ /**
+ * Adds the specified multicharacter to this set if it is not already
+ * present. If this set already contains the multicharacter,
+ * the call leaves this set unchanged.
+ * Thus "ch" => {"ch"}
+ *
Warning: you cannot add an empty string ("") to a UnicodeSet.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& add(const UnicodeString& s);
+
+ private:
+ /**
+ * @return a code point IF the string consists of a single one.
+ * otherwise returns -1.
+ * @param string to test
+ */
+ static int32_t getSingleCP(const UnicodeString& s);
+
+ void _add(const UnicodeString& s);
+
+ public:
+ /**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& addAll(const UnicodeString& s);
+
+ /**
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& retainAll(const UnicodeString& s);
+
+ /**
+ * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& complementAll(const UnicodeString& s);
+
+ /**
+ * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& removeAll(const UnicodeString& s);
+
+ /**
+ * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
+ *
Warning: you cannot add an empty string ("") to a UnicodeSet.
+ * @param s the source string
+ * @return a newly created set containing the given string.
+ * The caller owns the return object and is responsible for deleting it.
+ * @stable ICU 2.4
+ */
+ static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
+
+
+ /**
+ * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
+ * @param s the source string
+ * @return a newly created set containing the given characters
+ * The caller owns the return object and is responsible for deleting it.
+ * @stable ICU 2.4
+ */
+ static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
+
+ /**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If end > start then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& retain(UChar32 start, UChar32 end);
+
+
+ /**
+ * Retain the specified character from this set if it is present.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& retain(UChar32 c);
+
+ /**
+ * Removes the specified range from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns. If end > start then an empty range is
+ * removed, leaving the set unchanged.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& remove(UChar32 start, UChar32 end);
+
+ /**
+ * Removes the specified character from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& remove(UChar32 c);
+
+ /**
+ * Removes the specified string from this set if it is present.
+ * The set will not contain the specified character once the call
+ * returns.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& remove(const UnicodeString& s);
+
+ /**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This is equivalent to
+ * complement(MIN_VALUE, MAX_VALUE).
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& complement(void);
+
+ /**
+ * Complements the specified range in this set. Any character in
+ * the range will be removed if it is in this set, or will be
+ * added if it is not in this set. If end > start
+ * then an empty range is complemented, leaving the set unchanged.
+ * This is equivalent to a boolean logic XOR.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& complement(UChar32 start, UChar32 end);
+
+ /**
+ * Complements the specified character in this set. The character
+ * will be removed if it is in this set, or will be added if it is
+ * not in this set.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& complement(UChar32 c);
+
+ /**
+ * Complement the specified string in this set.
+ * The set will not contain the specified string once the call
+ * returns.
+ *
Warning: you cannot add an empty string ("") to a UnicodeSet.
+ * A frozen set will not be modified.
+ * @param s the string to complement
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& complement(const UnicodeString& s);
+
+ /**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the union of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param c set whose elements are to be added to this set.
+ * @see #add(UChar32, UChar32)
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& addAll(const UnicodeSet& c);
+
+ /**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the intersection of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements this set will retain.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& retainAll(const UnicodeSet& c);
+
+ /**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the asymmetric set difference of
+ * the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements will be removed from
+ * this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& removeAll(const UnicodeSet& c);
+
+ /**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements will be xor'ed from
+ * this set.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet& complementAll(const UnicodeSet& c);
+
+ /**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& clear(void);
+
+ /**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param attribute bitmask for attributes to close over.
+ * Currently only the USET_CASE bit is supported. Any undefined bits
+ * are ignored.
+ * @return a reference to this set.
+ * @internal
+ */
+ UnicodeSet& closeOver(int32_t attribute);
+
+ /**
+ * Remove all strings from this set.
+ *
+ * @return a reference to this set.
+ * @internal
+ */
+ virtual UnicodeSet &removeAllStrings();
+
+ /**
+ * Iteration method that returns the number of ranges contained in
+ * this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual int32_t getRangeCount(void) const;
+
+ /**
+ * Iteration method that returns the first character in the
+ * specified range of this set.
+ * @see #getRangeCount
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getRangeStart(int32_t index) const;
+
+ /**
+ * Iteration method that returns the last character in the
+ * specified range of this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getRangeEnd(int32_t index) const;
+
+ /**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array has following format (each line is one 16-bit
+ * integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR
+ * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if
+ * n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+ int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
+
+ /**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet& compact();
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ *
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . Derived::getStaticClassID()) ...
+ *
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Implement UnicodeFunctor API.
+ *
+ * @return The class ID for this object. All objects of a given
+ * class have the same class ID. Objects of other classes have
+ * different class IDs.
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+private:
+
+ // Private API for the USet API
+
+ friend class USetAccess;
+
+ int32_t getStringCount() const;
+
+ const UnicodeString* getString(int32_t index) const;
+
+ //----------------------------------------------------------------
+ // RuleBasedTransliterator support
+ //----------------------------------------------------------------
+
+private:
+
+ /**
+ * Returns true if this set contains any character whose low byte
+ * is the given value. This is used by RuleBasedTransliterator for
+ * indexing.
+ */
+ virtual UBool matchesIndexValue(uint8_t v) const;
+
+private:
+
+ //----------------------------------------------------------------
+ // Implementation: Clone as thawed (see ICU4J Freezable)
+ //----------------------------------------------------------------
+
+ UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
+
+ //----------------------------------------------------------------
+ // Implementation: Pattern parsing
+ //----------------------------------------------------------------
+
+ void applyPattern(RuleCharacterIterator& chars,
+ const SymbolTable* symbols,
+ UnicodeString& rebuiltPat,
+ uint32_t options,
+ UErrorCode& ec);
+
+ //----------------------------------------------------------------
+ // Implementation: Utility methods
+ //----------------------------------------------------------------
+
+ void ensureCapacity(int32_t newLen);
+
+ void ensureBufferCapacity(int32_t newLen);
+
+ void swapBuffers(void);
+
+ UBool allocateStrings(UErrorCode &status);
+
+ UnicodeString& _toPattern(UnicodeString& result,
+ UBool escapeUnprintable) const;
+
+ UnicodeString& _generatePattern(UnicodeString& result,
+ UBool escapeUnprintable) const;
+
+ static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
+
+ static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
+
+ //----------------------------------------------------------------
+ // Implementation: Fundamental operators
+ //----------------------------------------------------------------
+
+ void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ void add(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ /**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a property set pattern [:foo:], \\p{foo}, or
+ * \\P{foo}, or \\N{name}.
+ */
+ static UBool resemblesPropertyPattern(const UnicodeString& pattern,
+ int32_t pos);
+
+ static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
+ int32_t iterOpts);
+
+ /**
+ * Parse the given property pattern at the given parse position
+ * and set this UnicodeSet to the result.
+ *
+ * The original design document is out of date, but still useful.
+ * Ignore the property and value names:
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
+ *
+ * Recognized syntax:
+ *
+ * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
+ * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
+ * \\N{name} - white space not allowed within "\\N"
+ *
+ * Other than the above restrictions, white space is ignored. Case
+ * is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
+ * and trailing space is deleted, and internal runs of whitespace
+ * are collapsed to a single space.
+ *
+ * We support binary properties, enumerated properties, and the
+ * following non-enumerated properties:
+ *
+ * Numeric_Value
+ * Name
+ * Unicode_1_Name
+ *
+ * @param pattern the pattern string
+ * @param ppos on entry, the position at which to begin parsing.
+ * This should be one of the locations marked '^':
+ *
+ * [:blah:] \\p{blah} \\P{blah} \\N{name}
+ * ^ % ^ % ^ % ^ %
+ *
+ * On return, the position after the last character parsed, that is,
+ * the locations marked '%'. If the parse fails, ppos is returned
+ * unchanged.
+ * @return a reference to this.
+ */
+ UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
+ ParsePosition& ppos,
+ UErrorCode &ec);
+
+ void applyPropertyPattern(RuleCharacterIterator& chars,
+ UnicodeString& rebuiltPat,
+ UErrorCode& ec);
+
+ /**
+ * A filter that returns TRUE if the given code point should be
+ * included in the UnicodeSet being constructed.
+ */
+ typedef UBool (*Filter)(UChar32 codePoint, void* context);
+
+ /**
+ * Given a filter, set this UnicodeSet to the code points
+ * contained by that filter. The filter MUST be
+ * property-conformant. That is, if it returns value v for one
+ * code point, then it must return v for all affiliated code
+ * points, as defined by the inclusions list. See
+ * getInclusions().
+ * src is a UPropertySource value.
+ */
+ void applyFilter(Filter filter,
+ void* context,
+ int32_t src,
+ UErrorCode &status);
+
+ /**
+ * Set the new pattern to cache.
+ */
+ void setPattern(const UnicodeString& newPat);
+ /**
+ * Release existing cached pattern.
+ */
+ void releasePattern();
+
+ friend class UnicodeSetIterator;
+};
+
+inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
+ return !operator==(o);
+}
+
+inline UBool UnicodeSet::isFrozen() const {
+ return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
+}
+
+inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
+ return !containsNone(start, end);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
+ return !containsNone(s);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
+ return !containsNone(s);
+}
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/unistr.h b/jni/EastAsianWidth/unicode/unistr.h
new file mode 100644
index 0000000..575998e
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/unistr.h
@@ -0,0 +1,4149 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+* Date Name Description
+* 09/25/98 stephen Creation.
+* 11/11/98 stephen Changed per 11/9 code review.
+* 04/20/99 stephen Overhauled per 4/16 code review.
+* 11/18/99 aliu Made to inherit from Replaceable. Added method
+* handleReplaceBetween(); other methods unchanged.
+* 06/25/01 grhoten Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file
+ * \brief C++ API: Unicode String
+ */
+
+#include "unicode/rep.h"
+
+struct UConverter; // unicode/ucnv.h
+class StringThreadTest;
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also ustring.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+#endif
+
+U_NAMESPACE_BEGIN
+
+class Locale; // unicode/locid.h
+class StringCharacterIterator;
+class BreakIterator; // unicode/brkiter.h
+
+/* The NUL, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
+#else
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * text, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator> (const UnicodeString& text) const;
+
+ /**
+ * Less than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * less than the characters in text, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator< (const UnicodeString& text) const;
+
+ /**
+ * Greater than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * greater than or equal to the characters in text, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator>= (const UnicodeString& text) const;
+
+ /**
+ * Less than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * less than or equal to the characters in text, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator<= (const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString to
+ * the characters in text.
+ * @param text The UnicodeString to compare to this one.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as text, -1 if the characters in
+ * this are bitwise less than the characters in text, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in text.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [start, start + length) with the characters
+ * in text
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters of text to compare.
+ * @param text the other text to be compared against this string.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as text, -1 if the characters in
+ * this are bitwise less than the characters in text, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in text.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [start, start + length) with the characters
+ * in srcText in the range
+ * [srcStart, srcStart + srcLength).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare.
+ * @param srcText the text to be compared
+ * @param srcStart the offset into srcText to start comparison
+ * @param srcLength the number of characters in src to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as srcText, -1 if the characters in
+ * this are bitwise less than the characters in srcText, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in srcText.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString with the first
+ * srcLength characters in srcChars.
+ * @param srcChars The characters to compare to this UnicodeString.
+ * @param srcLength the number of characters in srcChars to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as srcChars, -1 if the characters in
+ * this are bitwise less than the characters in srcChars, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in srcChars.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [start, start + length) with the first
+ * length characters in srcChars
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters to compare.
+ * @param srcChars the characters to be compared
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as srcChars, -1 if the characters in
+ * this are bitwise less than the characters in srcChars, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in srcChars.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [start, start + length) with the characters
+ * in srcChars in the range
+ * [srcStart, srcStart + srcLength).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare
+ * @param srcChars the characters to be compared
+ * @param srcStart the offset into srcChars to start comparison
+ * @param srcLength the number of characters in srcChars to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as srcChars, -1 if the characters in
+ * this are bitwise less than the characters in srcChars, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in srcChars.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [start, limit) with the characters
+ * in srcText in the range
+ * [srcStart, srcLimit).
+ * @param start the offset at which the compare operation begins
+ * @param limit the offset immediately following the compare operation
+ * @param srcText the text to be compared
+ * @param srcStart the offset into srcText to start comparison
+ * @param srcLimit the offset into srcText to limit comparison
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as srcText, -1 if the characters in
+ * this are bitwise less than the characters in srcText, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in srcText.
+ * @stable ICU 2.0
+ */
+ inline int8_t compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param text Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+ *
+ * @param text Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const;
+
+ /**
+ * Determine if this starts with the characters in text
+ * @param text The text to match.
+ * @return TRUE if this starts with the characters in text,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this starts with the characters in srcText
+ * in the range [srcStart, srcStart + srcLength).
+ * @param srcText The text to match.
+ * @param srcStart the offset into srcText to start matching
+ * @param srcLength the number of characters in srcText to match
+ * @return TRUE if this starts with the characters in text,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in srcChars
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in srcChars
+ * @return TRUE if this starts with the characters in srcChars,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in srcChars
+ * in the range [srcStart, srcStart + srcLength).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into srcText to start matching
+ * @param srcLength the number of characters in srcChars to match
+ * @return TRUE if this ends with the characters in srcChars, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in text
+ * @param text The text to match.
+ * @return TRUE if this ends with the characters in text,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this ends with the characters in srcText
+ * in the range [srcStart, srcStart + srcLength).
+ * @param srcText The text to match.
+ * @param srcStart the offset into srcText to start matching
+ * @param srcLength the number of characters in srcText to match
+ * @return TRUE if this ends with the characters in text,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in srcChars
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in srcChars
+ * @return TRUE if this ends with the characters in srcChars,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in srcChars
+ * in the range [srcStart, srcStart + srcLength).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into srcText to start matching
+ * @param srcLength the number of characters in srcChars to match
+ * @return TRUE if this ends with the characters in srcChars,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+
+ /* Searching - bitwise only */
+
+ /**
+ * Locate in this the first occurrence of the characters in text,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in text
+ * starting at offset start, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [start, start + length) of the characters
+ * in text, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [start, start + length) of the characters
+ * in srcText in the range
+ * [srcStart, srcStart + srcLength),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into srcText at which
+ * to start matching
+ * @param srcLength the number of characters in srcText to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in
+ * srcChars
+ * starting at offset start, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in srcChars to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [start, start + length) of the characters
+ * in srcChars, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in srcChars
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of srcChars,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [start, start + length) of the characters
+ * in srcChars in the range
+ * [srcStart, srcStart + srcLength),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into srcChars at which
+ * to start matching
+ * @param srcLength the number of characters in srcChars to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t indexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point c,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c) const;
+
+ /**
+ * Locate in this the first occurrence of the code point c,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point c,
+ * starting at offset start, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code point c
+ * starting at offset start, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point c
+ * in the range [start, start + length),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code point c
+ * in the range [start, start + length),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in text,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in text
+ * starting at offset start, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [start, start + length) of the characters
+ * in text, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [start, start + length) of the characters
+ * in srcText in the range
+ * [srcStart, srcStart + srcLength),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into srcText at which
+ * to start matching
+ * @param srcLength the number of characters in srcText to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in srcChars
+ * starting at offset start, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in srcChars to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [start, start + length) of the characters
+ * in srcChars, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in srcChars
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of srcChars,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [start, start + length) of the characters
+ * in srcChars in the range
+ * [srcStart, srcStart + srcLength),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into srcChars at which
+ * to start matching
+ * @param srcLength the number of characters in srcChars to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of text,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point c,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c) const;
+
+ /**
+ * Locate in this the last occurrence of the code point c,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point c
+ * starting at offset start, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code point c
+ * starting at offset start, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point c
+ * in the range [start, start + length),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code point c
+ * in the range [start, start + length),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of c, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+
+ /* Character access */
+
+ /**
+ * Return the code unit at offset offset.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset offset
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Return the code unit at offset offset.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset offset
+ * @stable ICU 2.0
+ */
+ inline UChar operator[] (int32_t offset) const;
+
+ /**
+ * Return the code point that contains the code unit
+ * at offset offset.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * that indicates the text offset of any of the code units
+ * that will be assembled into a code point (21-bit value) and returned
+ * @return the code point of text at offset
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points to the beginning of a Unicode character.
+ * The offset that is passed in points to
+ * any code unit of a code point,
+ * while the returned offset will point to the first code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points to a second surrogate
+ * of a surrogate pair, then the returned offset will point
+ * to the first surrogate.
+ * @param offset a valid offset into one code point of the text
+ * @return offset of the first code unit of the same code point
+ * @see U16_SET_CP_START
+ * @stable ICU 2.0
+ */
+ inline int32_t getChar32Start(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points behind a Unicode character.
+ * The offset that is passed in points behind
+ * any code unit of a code point,
+ * while the returned offset will point behind the last code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points behind the first surrogate
+ * (i.e., to the second surrogate)
+ * of a surrogate pair, then the returned offset will point
+ * behind the second surrogate (i.e., to the first surrogate).
+ * @param offset a valid offset after any code unit of a code point of the text
+ * @return offset of the first code unit after the same code point
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.0
+ */
+ inline int32_t getChar32Limit(int32_t offset) const;
+
+ /**
+ * Move the code unit index along the string by delta code points.
+ * Interpret the input index as a code unit-based offset into the string,
+ * move the index forward or backward by delta code points, and
+ * return the resulting index.
+ * The input index should point to the first code unit of a code point,
+ * if there is more than one.
+ *
+ * Both input and output indexes are code unit-based as for all
+ * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+ * If delta<0 then the index is moved backward (toward the start of the string).
+ * If delta>0 then the index is moved forward (toward the end of the string).
+ *
+ * This behaves like CharacterIterator::move32(delta, kCurrent).
+ *
+ * Behavior for out-of-bounds indexes:
+ * moveIndex32 pins the input index to 0..length(), i.e.,
+ * if the input index<0 then it is pinned to 0;
+ * if it is index>length() then it is pinned to length().
+ * Afterwards, the index is moved by delta code points
+ * forward or backward,
+ * but no further backward than to 0 and no further forward than to length().
+ * The resulting index return value will be in between 0 and length(), inclusively.
+ *
+ * Examples:
+ *
+ * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+ * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+ *
+ * // initial index: position of U+10000
+ * int32_t index=1;
+ *
+ * // the following examples will all result in index==4, position of U+10ffff
+ *
+ * // skip 2 code points from some position in the string
+ * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+ *
+ * // go to the 3rd code point from the start of s (0-based)
+ * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+ *
+ * // go to the next-to-last code point of s
+ * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+ *
+ *
+ * @param index input code unit index
+ * @param delta (signed) code point count to move the index forward or backward
+ * in the string
+ * @return the resulting code unit index
+ * @stable ICU 2.0
+ */
+ int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+ /* Substring extraction */
+
+ /**
+ * Copy the characters in the range
+ * [start, start + length) into the array dst,
+ * beginning at dstStart.
+ * If the string aliases to dst itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param start offset of first character which will be copied into the array
+ * @param length the number of characters to extract
+ * @param dst array in which to copy characters. The length of dst
+ * must be at least (dstStart + length).
+ * @param dstStart the offset in dst where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the contents of the string into dest.
+ * This is a convenience function that
+ * checks if there is enough space in dest,
+ * extracts the entire string if possible,
+ * and NUL-terminates dest if possible.
+ *
+ * If the string fits into dest but cannot be NUL-terminated
+ * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dest
+ * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If the string aliases to dest itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param dest Destination string buffer.
+ * @param destCapacity Number of UChars available at dest.
+ * @param errorCode ICU error code.
+ * @return length()
+ * @stable ICU 2.0
+ */
+ int32_t
+ extract(UChar *dest, int32_t destCapacity,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [start, start + length) into the UnicodeString
+ * target.
+ * @param start offset of first character which will be copied
+ * @param length the number of characters to extract
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to target
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range [start, limit)
+ * into the array dst, beginning at dstStart.
+ * @param start offset of first character which will be copied into the array
+ * @param limit offset immediately following the last character to be copied
+ * @param dst array in which to copy characters. The length of dst
+ * must be at least (dstStart + (limit - start)).
+ * @param dstStart the offset in dst where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the characters in the range [start, limit)
+ * into the UnicodeString target. Replaceable API.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to target
+ * @stable ICU 2.0
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range
+ * [start, start + length) into an array of characters.
+ * All characters must be invariant (see utypes.h).
+ * Use US_INV as the last, signature-distinguishing parameter.
+ *
+ * This function does not write any more than targetLength
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction, can be NULL
+ * if targetLength is 0
+ * @param targetCapacity the length of the target buffer
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 3.2
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ int32_t targetCapacity,
+ enum EInvariant inv) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [start, start + length) into an array of characters
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If codepage is an empty string (""),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If target is NULL, then the number of bytes required for
+ * target is returned. It is assumed that the target is big enough
+ * to fit all of the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ inline int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy the characters in the range
+ * [start, start + length) into an array of characters
+ * in a specified codepage.
+ * This function does not write any more than targetLength
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If codepage is an empty string (""),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If target is NULL, then the number of bytes required for
+ * target is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength,
+ const char *codepage = 0) const;
+
+ /**
+ * Convert the UnicodeString into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are extracted.
+ *
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+ * or NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @stable ICU 2.0
+ */
+ int32_t extract(char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+#endif
+
+ /* Length operations */
+
+ /**
+ * Return the length of the UnicodeString object.
+ * The length is the number of UChar code units are in the UnicodeString.
+ * If you want the number of code points, please use countChar32().
+ * @return the length of the UnicodeString object
+ * @see countChar32
+ * @stable ICU 2.0
+ */
+ inline int32_t length(void) const;
+
+ /**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of moveIndex32().
+ *
+ * @param start the index of the first code unit to check
+ * @param length the number of UChar code units to check
+ * @return the number of code points in the specified code units
+ * @see length
+ * @stable ICU 2.0
+ */
+ int32_t
+ countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Check if the length UChar code units of the string
+ * contain more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in this part of the string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length
+ * falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (countChar32(start, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param start the index of the first code unit to check (0 for the entire string)
+ * @param length the number of UChar code units to check
+ * (use INT32_MAX for the entire string; remember that start/length
+ * values are pinned)
+ * @param number The number of code points in the (sub)string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @see countChar32
+ * @see u_strHasMoreChar32Than
+ * @stable ICU 2.4
+ */
+ UBool
+ hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+ /**
+ * Determine if this string is empty.
+ * @return TRUE if this string contains 0 characters, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool isEmpty(void) const;
+
+ /**
+ * Return the capacity of the internal buffer of the UnicodeString object.
+ * This is useful together with the getBuffer functions.
+ * See there for details.
+ *
+ * @return the number of UChars available in the internal buffer
+ * @see getBuffer
+ * @stable ICU 2.0
+ */
+ inline int32_t getCapacity(void) const;
+
+ /* Other operations */
+
+ /**
+ * Generate a hash code for this object.
+ * @return The hash code of this UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline int32_t hashCode(void) const;
+
+ /**
+ * Determine if this object contains a valid string.
+ * A bogus string has no value. It is different from an empty string.
+ * It can be used to indicate that no string value is available.
+ * getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * @return TRUE if the string is valid, FALSE otherwise
+ * @see setToBogus()
+ * @stable ICU 2.0
+ */
+ inline UBool isBogus(void) const;
+
+
+ //========================================
+ // Write operations
+ //========================================
+
+ /* Assignment operations */
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the characters from srcText.
+ * @param srcText The text containing the characters to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &operator=(const UnicodeString &srcText);
+
+ /**
+ * Almost the same as the assignment operator.
+ * Replace the characters in this UnicodeString
+ * with the characters from srcText.
+ *
+ * This function works the same for all strings except for ones that
+ * are readonly aliases.
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * This function implements the old, more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ * The fastCopyFrom function must be used only if it is known that the lifetime of
+ * this UnicodeString is at least as long as the lifetime of the aliased buffer
+ * including its contents, for example for strings from resource bundles
+ * or aliases to string contents.
+ *
+ * @param src The text containing the characters to replace.
+ * @return a reference to this
+ * @stable ICU 2.4
+ */
+ UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code unit ch.
+ * @param ch the code unit to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar ch);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code point ch.
+ * @param ch the code point to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar32 ch);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in srcText in the range
+ * [srcStart, srcText.length()).
+ * srcText is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into srcText where new characters
+ * will be obtained
+ * @return a reference to this
+ * @stable ICU 2.2
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in srcText in the range
+ * [srcStart, srcStart + srcLength).
+ * srcText is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into srcText where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcText in the
+ * replace string.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * srcText.
+ * srcText is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText);
+
+ /**
+ * Set the characters in the UnicodeString object to the characters
+ * in srcChars. srcChars is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Set the characters in the UnicodeString object to the code unit
+ * srcChar.
+ * @param srcChar the code unit which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setTo(UChar srcChar);
+
+ /**
+ * Set the characters in the UnicodeString object to the code point
+ * srcChar.
+ * @param srcChar the code point which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setTo(UChar32 srcChar);
+
+ /**
+ * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ * In an assignment to another UnicodeString, the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if text is NUL-terminated.
+ * This must be true if textLength==-1.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in text to alias.
+ * If -1, then this constructor will determine the length
+ * by calling u_strlen().
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UBool isTerminated,
+ const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(UChar *dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in buffer to alias.
+ * @param buffCapacity The size of buffer in UChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UChar *buffer,
+ int32_t buffLength,
+ int32_t buffCapacity);
+
+ /**
+ * Make this UnicodeString object invalid.
+ * The string will test TRUE with isBogus().
+ *
+ * A bogus string has no value. It is different from an empty string.
+ * It can be used to indicate that no string value is available.
+ * getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * This utility function is used throughout the UnicodeString
+ * implementation to indicate that a UnicodeString operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * The following methods, and no others, will clear a string object's bogus flag:
+ * - remove()
+ * - remove(0, INT32_MAX)
+ * - truncate(0)
+ * - operator=() (assignment operator)
+ * - setTo(...)
+ *
+ * The simplest ways to turn a bogus string into an empty one
+ * is to use the remove() function.
+ * Examples for other functions that are equivalent to "set to empty string":
+ * \code
+ * if(s.isBogus()) {
+ * s.remove(); // set to an empty string (remove all), or
+ * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+ * s.truncate(0); // set to an empty string (complete truncation), or
+ * s=UnicodeString(); // assign an empty string, or
+ * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+ * static const UChar nul=0;
+ * s.setTo(&nul, 0); // set to an empty C Unicode string
+ * }
+ * \endcode
+ *
+ * @see isBogus()
+ * @stable ICU 2.0
+ */
+ void setToBogus();
+
+ /**
+ * Set the character at the specified offset to the specified character.
+ * @param offset A valid offset into the text of the character to set
+ * @param ch The new character
+ * @return A reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setCharAt(int32_t offset,
+ UChar ch);
+
+
+ /* Append operations */
+
+ /**
+ * Append operator. Append the code unit ch to the UnicodeString
+ * object.
+ * @param ch the code unit to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar ch);
+
+ /**
+ * Append operator. Append the code point ch to the UnicodeString
+ * object.
+ * @param ch the code point to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar32 ch);
+
+ /**
+ * Append operator. Append the characters in srcText to the
+ * UnicodeString object at offset start. srcText is
+ * not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+ /**
+ * Append the characters
+ * in srcText in the range
+ * [srcStart, srcStart + srcLength) to the
+ * UnicodeString object at offset start. srcText
+ * is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into srcText where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcText in
+ * the append string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in srcText to the UnicodeString object at
+ * offset start. srcText is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText);
+
+ /**
+ * Append the characters in srcChars in the range
+ * [srcStart, srcStart + srcLength) to the UnicodeString
+ * object at offset
+ * start. srcChars is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into srcChars where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcChars in
+ * the append string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in srcChars to the UnicodeString object
+ * at offset start. srcChars is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Append the code unit srcChar to the UnicodeString object.
+ * @param srcChar the code unit to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(UChar srcChar);
+
+ /**
+ * Append the code point srcChar to the UnicodeString object.
+ * @param srcChar the code point to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(UChar32 srcChar);
+
+
+ /* Insert operations */
+
+ /**
+ * Insert the characters in srcText in the range
+ * [srcStart, srcStart + srcLength) into the UnicodeString
+ * object at offset start. srcText is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into srcText where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcText in
+ * the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in srcText into the UnicodeString object
+ * at offset start. srcText is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText);
+
+ /**
+ * Insert the characters in srcChars in the range
+ * [srcStart, srcStart + srcLength) into the UnicodeString
+ * object at offset start. srcChars is not modified.
+ * @param start the offset at which the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into srcChars where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcChars
+ * in the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in srcChars into the UnicodeString object
+ * at offset start. srcChars is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Insert the code unit srcChar into the UnicodeString object at
+ * offset start.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code unit to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar srcChar);
+
+ /**
+ * Insert the code point srcChar into the UnicodeString object at
+ * offset start.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code point to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar32 srcChar);
+
+
+ /* Replace operations */
+
+ /**
+ * Replace the characters in the range
+ * [start, start + length) with the characters in
+ * srcText in the range
+ * [srcStart, srcStart + srcLength).
+ * srcText is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * start + length is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into srcText where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcText in
+ * the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [start, start + length)
+ * with the characters in srcText. srcText is
+ * not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * start + length is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range
+ * [start, start + length) with the characters in
+ * srcChars in the range
+ * [srcStart, srcStart + srcLength). srcChars
+ * is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * start + length is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into srcChars where new characters
+ * will be obtained
+ * @param srcLength the number of characters in srcChars
+ * in the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [start, start + length) with the characters in
+ * srcChars. srcChars is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length number of characters to replace. The character at
+ * start + length is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [start, start + length) with the code unit
+ * srcChar.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * start + length is not modified.
+ * @param srcChar the new code unit
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ UChar srcChar);
+
+ /**
+ * Replace the characters in the range
+ * [start, start + length) with the code point
+ * srcChar.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * start + length is not modified.
+ * @param srcChar the new code point
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ UChar32 srcChar);
+
+ /**
+ * Replace the characters in the range [start, limit)
+ * with the characters in srcText. srcText is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range [start, limit)
+ * with the characters in srcText in the range
+ * [srcStart, srcLimit). srcText is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into srcChars where new characters
+ * will be obtained
+ * @param srcLimit the offset immediately following the range to copy
+ * in srcText
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; 0 <= start
+ * <= limit.
+ * @param limit the ending index, exclusive; start <= limit
+ * <= length().
+ * @param text the text to replace characters start
+ * to limit - 1
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text);
+
+ /**
+ * Replaceable API
+ * @return TRUE if it has MetaData
+ * @stable ICU 2.4
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Copy a substring of this object, retaining attribute (out-of-band)
+ * information. This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; 0 <= start <=
+ * limit.
+ * @param limit the ending index, exclusive; start <= limit <=
+ * length().
+ * @param dest the destination index. The characters from
+ * start..limit-1 will be copied to dest.
+ * Implementations of this method may assume that dest <= start ||
+ * dest >= limit.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest);
+
+ /* Search and replace operations */
+
+ /**
+ * Replace all occurrences of characters in oldText with the characters
+ * in newText
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText with characters
+ * in newText
+ * in the range [start, start + length).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText in the range
+ * [oldStart, oldStart + oldLength) with the characters
+ * in newText in the range
+ * [newStart, newStart + newLength)
+ * in the range [start, start + length).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param oldStart the start of the search range in oldText
+ * @param oldLength the length of the search range in oldText
+ * @param newText the text containing the replacement text
+ * @param newStart the start of the replacement range in newText
+ * @param newLength the length of the replacement range in newText
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ int32_t oldStart,
+ int32_t oldLength,
+ const UnicodeString& newText,
+ int32_t newStart,
+ int32_t newLength);
+
+
+ /* Remove operations */
+
+ /**
+ * Remove all characters from the UnicodeString object.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(void);
+
+ /**
+ * Remove the characters in the range
+ * [start, start + length) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param length the number of characters to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(int32_t start,
+ int32_t length = (int32_t)INT32_MAX);
+
+ /**
+ * Remove the characters in the range
+ * [start, limit) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param limit the offset immediately following the range to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& removeBetween(int32_t start,
+ int32_t limit = (int32_t)INT32_MAX);
+
+
+ /* Length operations */
+
+ /**
+ * Pad the start of this UnicodeString with the character padChar.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * beginning of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return TRUE if the text was padded, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padLeading(int32_t targetLength,
+ UChar padChar = 0x0020);
+
+ /**
+ * Pad the end of this UnicodeString with the character padChar.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * end of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return TRUE if the text was padded, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padTrailing(int32_t targetLength,
+ UChar padChar = 0x0020);
+
+ /**
+ * Truncate this UnicodeString to the targetLength.
+ * @param targetLength the desired length of this UnicodeString.
+ * @return TRUE if the text was truncated, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool truncate(int32_t targetLength);
+
+ /**
+ * Trims leading and trailing whitespace from this UnicodeString.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& trim(void);
+
+
+ /* Miscellaneous operations */
+
+ /**
+ * Reverse this UnicodeString in place.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(void);
+
+ /**
+ * Reverse the range [start, start + length) in
+ * this UnicodeString.
+ * @param start the start of the range to reverse
+ * @param length the number of characters to to reverse
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(int32_t start,
+ int32_t length);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(void);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(const Locale& locale);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(void);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecase this string, convenience function using the default locale.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter);
+
+ /**
+ * Titlecase this string.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+ /**
+ * Titlecase this string, with options.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options.)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @param options Options bit set, see ucasemap_open().
+ * @return A reference to this.
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @see ucasemap_open
+ * @draft ICU 3.8
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+ /**
+ * Case-fold the characters in this string.
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+ //========================================
+ // Access to the internal buffer
+ //========================================
+
+ /**
+ * Get a read/write pointer to the internal buffer.
+ * The buffer is guaranteed to be large enough for at least minCapacity UChars,
+ * writable, and is still owned by the UnicodeString object.
+ * Calls to getBuffer(minCapacity) must not be nested, and
+ * must be matched with calls to releaseBuffer(newLength).
+ * If the string buffer was read-only or shared,
+ * then it will be reallocated and copied.
+ *
+ * An attempted nested call will return 0, and will not further modify the
+ * state of the UnicodeString object.
+ * It also returns 0 if the string is bogus.
+ *
+ * The actual capacity of the string buffer may be larger than minCapacity.
+ * getCapacity() returns the actual capacity.
+ * For many operations, the full capacity should be used to avoid reallocations.
+ *
+ * While the buffer is "open" between getBuffer(minCapacity)
+ * and releaseBuffer(newLength), the following applies:
+ * - The string length is set to 0.
+ * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+ * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+ * - You can read from and write to the returned buffer.
+ * - The previous string contents will still be in the buffer;
+ * if you want to use it, then you need to call length() before getBuffer(minCapacity).
+ * If the length() was greater than minCapacity, then any contents after minCapacity
+ * may be lost.
+ * The buffer contents is not NUL-terminated by getBuffer().
+ * If length()text is NUL-terminated.
+ * This must be true if textLength==-1.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in text to alias.
+ * If -1, then this constructor will determine the length
+ * by calling u_strlen().
+ * @stable ICU 2.0
+ */
+ UnicodeString(UBool isTerminated,
+ const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Writable-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(UChar *dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in buffer to alias.
+ * @param buffCapacity The size of buffer in UChars.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes, null-terminated
+ * @param codepage the encoding of codepageData. The special
+ * value 0 for codepage indicates that the text is in the
+ * platform's default codepage.
+ *
+ * If codepage is an empty string (""),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData,
+ const char *codepage = 0);
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes.
+ * @param dataLength The number of bytes in codepageData.
+ * @param codepage the encoding of codepageData. The special
+ * value 0 for codepage indicates that the text is in the
+ * platform's default codepage.
+ * If codepage is an empty string (""),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage = 0);
+
+ /**
+ * char * / UConverter constructor.
+ * This constructor uses an existing UConverter object to
+ * convert the codepage string to Unicode and construct a UnicodeString
+ * from that.
+ *
+ * The converter is reset at first.
+ * If the error code indicates a failure before this constructor is called,
+ * or if an error occurs during conversion or construction,
+ * then the string will be bogus.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are constructed.
+ *
+ * @param src input codepage string
+ * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+ * @param cnv converter object (ucnv_resetToUnicode() will be called),
+ * can be NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @stable ICU 2.0
+ */
+ UnicodeString(
+ const char *src, int32_t srcLength,
+ UConverter *cnv,
+ UErrorCode &errorCode);
+
+#endif
+
+ /**
+ * Constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * Use the macro US_INV as the third, signature-distinguishing parameter.
+ *
+ * For example:
+ * \code
+ * void fn(const char *s) {
+ * UnicodeString ustr(s, -1, US_INV);
+ * // use ustr ...
+ * }
+ * \endcode
+ *
+ * @param src String using only invariant characters.
+ * @param length Length of src, or -1 if NUL-terminated.
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+
+
+ /**
+ * Copy constructor.
+ * @param that The UnicodeString object to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const UnicodeString& that);
+
+ /**
+ * 'Substring' constructor from tail of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into src at which to start copying.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+ /**
+ * 'Substring' constructor from subrange of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into src at which to start copying.
+ * @param srcLength The number of characters from src to copy.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a pointer to a Replaceable
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see Replaceable::clone
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeString();
+
+
+ /* Miscellaneous operations */
+
+ /**
+ * Unescape a string of characters and return a string containing
+ * the result. The following escape sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * This function is similar to u_unescape() but not identical to it.
+ * The latter takes a source char*, so it does escape recognition
+ * and also invariant conversion.
+ *
+ * @return a string with backslash escapes interpreted, or an
+ * empty string on error.
+ * @see UnicodeString#unescapeAt()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UnicodeString unescape() const;
+
+ /**
+ * Unescape a single escape sequence and return the represented
+ * character. See unescape() for a listing of the recognized escape
+ * sequences. The character at offset-1 is assumed (without
+ * checking) to be a backslash. If the escape sequence is
+ * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
+ * returned.
+ *
+ * @param offset an input output parameter. On input, it is the
+ * offset into this string where the escape sequence is located,
+ * after the initial backslash. On output, it is advanced after the
+ * last character parsed. On error, it is not advanced at all.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see UnicodeString#unescape()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UChar32 unescapeAt(int32_t &offset) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ //========================================
+ // Implementation methods
+ //========================================
+
+protected:
+ /**
+ * Implement Replaceable::getLength() (see jitterbug 1027).
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const;
+
+ /**
+ * The change in Replaceable to use virtual getCharAt() allows
+ * UnicodeString::charAt() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar getCharAt(int32_t offset) const;
+
+ /**
+ * The change in Replaceable to use virtual getChar32At() allows
+ * UnicodeString::char32At() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const;
+
+private:
+
+ inline int8_t
+ doCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int32_t doIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ void doExtract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart) const;
+
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ inline UChar doCharAt(int32_t offset) const;
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReverse(int32_t start,
+ int32_t length);
+
+ // calculate hash code
+ int32_t doHashCode(void) const;
+
+ // get pointer to start of array
+ inline UChar* getArrayStart(void);
+ inline const UChar* getArrayStart(void) const;
+
+ // allocate the array; result may be fStackBuffer
+ // sets refCount to 1 if appropriate
+ // sets fArray, fCapacity, and fFlags
+ // returns boolean for success or failure
+ UBool allocate(int32_t capacity);
+
+ // release the array if owned
+ void releaseArray(void);
+
+ // turn a bogus string into an empty one
+ void unBogus();
+
+ // implements assigment operator, copy constructor, and fastCopyFrom()
+ UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE);
+
+ // Pin start and limit to acceptable values.
+ inline void pinIndex(int32_t& start) const;
+ inline void pinIndices(int32_t& start,
+ int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+ /* Internal extract() using UConverter. */
+ int32_t doExtract(int32_t start, int32_t length,
+ char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+ /*
+ * Real constructor for converting from codepage data.
+ * It assumes that it is called with !fRefCounted.
+ *
+ * If codepage==0, then the default converter
+ * is used for the platform encoding.
+ * If codepage is an empty string (""),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ */
+ void doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage);
+
+ /*
+ * Worker function for creating a UnicodeString from
+ * a codepage string using a UConverter.
+ */
+ void
+ doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ UConverter *converter,
+ UErrorCode &status);
+
+#endif
+
+ /*
+ * This function is called when write access to the array
+ * is necessary.
+ *
+ * We need to make a copy of the array if
+ * the buffer is read-only, or
+ * the buffer is refCounted (shared), and refCount>1, or
+ * the buffer is too small.
+ *
+ * Return FALSE if memory could not be allocated.
+ */
+ UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+ int32_t growCapacity = -1,
+ UBool doCopyArray = TRUE,
+ int32_t **pBufferToDelete = 0,
+ UBool forceClone = FALSE);
+
+ // common function for case mappings
+ UnicodeString &
+ caseMap(BreakIterator *titleIter,
+ const char *locale,
+ uint32_t options,
+ int32_t toWhichCase);
+
+ // ref counting
+ void addRef(void);
+ int32_t removeRef(void);
+ int32_t refCount(void) const;
+
+ // constants
+ enum {
+ US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
+ kInvalidUChar=0xffff, // invalid UChar index
+ kGrowSize=128, // grow size for this buffer
+ kInvalidHashCode=0, // invalid hash code
+ kEmptyHashCode=1, // hash code for empty string
+
+ // bit flag values for fFlags
+ kIsBogus=1, // this string is bogus, i.e., not valid or NULL
+ kUsingStackBuffer=2,// fArray==fStackBuffer
+ kRefCounted=4, // there is a refCount field before the characters in fArray
+ kBufferIsReadonly=8,// do not write to this buffer
+ kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
+ // and releaseBuffer(newLength) must be called
+
+ // combined values for convenience
+ kShortString=kUsingStackBuffer,
+ kLongString=kRefCounted,
+ kReadonlyAlias=kBufferIsReadonly,
+ kWritableAlias=0
+ };
+
+ friend class StringCharacterIterator;
+ friend class StringThreadTest;
+
+ /*
+ * The following are all the class fields that are stored
+ * in each UnicodeString object.
+ * Note that UnicodeString has virtual functions,
+ * therefore there is an implicit vtable pointer
+ * as the first real field.
+ * The fields should be aligned such that no padding is
+ * necessary, mostly by having larger types first.
+ * On 32-bit machines, the size should be 32 bytes,
+ * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+ */
+ // (implicit) *vtable;
+ int32_t fLength; // number of characters in fArray
+ int32_t fCapacity; // sizeof fArray
+ UChar *fArray; // the Unicode data
+ uint16_t fFlags; // bit flags: see constants above
+ UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
+
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+ // pin index
+ if(start < 0) {
+ start = 0;
+ } else if(start > fLength) {
+ start = fLength;
+ }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+ int32_t& _length) const
+{
+ // pin indices
+ if(start < 0) {
+ start = 0;
+ } else if(start > fLength) {
+ start = fLength;
+ }
+ if(_length < 0) {
+ _length = 0;
+ } else if(_length > (fLength - start)) {
+ _length = (fLength - start);
+ }
+}
+
+inline UChar*
+UnicodeString::getArrayStart()
+{ return fArray; }
+
+inline const UChar*
+UnicodeString::getArrayStart() const
+{ return fArray; }
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline int32_t
+UnicodeString::length() const
+{ return fLength; }
+
+inline int32_t
+UnicodeString::getCapacity() const
+{ return fCapacity; }
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fFlags & kIsBogus); }
+
+inline const UChar *
+UnicodeString::getBuffer() const {
+ if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
+ return fArray;
+ } else {
+ return 0;
+ }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(start, length, srcText.fArray, srcStart, srcLength);
+ }
+}
+
+inline UBool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+ if(isBogus()) {
+ return text.isBogus();
+ } else {
+ return
+ !text.isBogus() &&
+ fLength == text.fLength &&
+ doCompare(0, fLength, text, 0, text.fLength) == 0;
+ }
+}
+
+inline UBool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, fLength, text, 0, text.fLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
+
+inline int8_t
+UnicodeString::compare(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompare(0, fLength, srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
+ }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UChar *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
+ }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+ return doCaseCompare(0, fLength, text, 0, text.fLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const {
+ return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.fLength, 0, fLength); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(text, 0, text.fLength, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(text, 0, text.fLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(srcChars, 0, srcLength, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c) const
+{ return doIndexOf(c, 0, fLength); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, fLength); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+ int32_t start) const {
+ pinIndex(start);
+ return doIndexOf(c, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(c, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(text, 0, text.fLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(text, 0, text.fLength, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+ int32_t start,
+ int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const {
+ return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c) const
+{ return doLastIndexOf(c, 0, fLength); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+ return lastIndexOf(c, 0, fLength);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+ int32_t start) const {
+ pinIndex(start);
+ return doLastIndexOf(c, start, fLength - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(c, start, fLength - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doCompare(fLength - text.fLength, text.fLength,
+ text, 0, text.fLength) == 0; }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(fLength - srcLength, srcLength,
+ srcText, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(fLength - srcLength, srcLength,
+ srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars + srcStart);
+ }
+ return doCompare(fLength - srcLength, srcLength,
+ srcChars, srcStart, srcLength) == 0;
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ UChar srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ UChar32 srcChar) {
+ UChar buffer[U16_MAX_LENGTH];
+ int32_t count = 0;
+ UBool isError = FALSE;
+ U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
+ return doReplace(start, _length, buffer, 0, count);
+}
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
+ newText, 0, newText.fLength); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+ int32_t _length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
+ newText, 0, newText.fLength); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ target.replace(0, target.fLength, *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UChar *target,
+ int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ char *dst,
+ const char *codepage) const
+
+{
+ // This dstSize value will be checked explicitly
+ return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart) const {
+ pinIndex(start);
+ pinIndex(limit);
+ doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UChar
+UnicodeString::doCharAt(int32_t offset) const
+{
+ if((uint32_t)offset < (uint32_t)fLength) {
+ return fArray[offset];
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+inline UChar
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar32
+UnicodeString::char32At(int32_t offset) const
+{
+ if((uint32_t)offset < (uint32_t)fLength) {
+ UChar32 c;
+ U16_GET(fArray, 0, offset, fLength, c);
+ return c;
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+inline int32_t
+UnicodeString::getChar32Start(int32_t offset) const {
+ if((uint32_t)offset < (uint32_t)fLength) {
+ U16_SET_CP_START(fArray, 0, offset);
+ return offset;
+ } else {
+ return 0;
+ }
+}
+
+inline int32_t
+UnicodeString::getChar32Limit(int32_t offset) const {
+ if((uint32_t)offset < (uint32_t)fLength) {
+ U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
+ return offset;
+ } else {
+ return fLength;
+ }
+}
+
+inline UBool
+UnicodeString::isEmpty() const {
+ return fLength == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline const UChar *
+UnicodeString::getTerminatedBuffer() {
+ if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+ return 0;
+ } else if(fLengthunorm_normalize transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * unorm_normalize supports the standard normalization forms described in
+ *
+ * Unicode Standard Annex #15: Unicode Normalization Forms.
+ *
+ * Characters with accents or other adornments can be encoded in
+ * several different ways in Unicode. For example, take the character A-acute.
+ * In Unicode, this can be encoded as a single character (the
+ * "composed" form):
+ *
+ * \code
+ * 00C1 LATIN CAPITAL LETTER A WITH ACUTE
+ * \endcode
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * \code
+ * 0041 LATIN CAPITAL LETTER A
+ * 0301 COMBINING ACUTE ACCENT
+ * \endcode
+ *
+ * To a user of your program, however, both of these sequences should be
+ * treated as the same "user-level" character "A with acute accent". When you are searching or
+ * comparing text, you must ensure that these two sequences are treated
+ * equivalently. In addition, you must handle characters with more than one
+ * accent. Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * \code
+ * 0066 LATIN SMALL LETTER F
+ * 0066 LATIN SMALL LETTER F
+ * 0069 LATIN SMALL LETTER I
+ * \endcode
+ *
+ * or as the single character
+ *
+ * \code
+ * FB03 LATIN SMALL LIGATURE FFI
+ * \endcode
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility
+ * with existing character sets that already provided it. The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions
+ * into the corresponding semantic characters. When sorting and searching, you
+ * will often want to use these mappings.
+ *
+ * unorm_normalize helps solve these problems by transforming text into the
+ * canonical composed and decomposed forms as shown in the first example above.
+ * In addition, you can have it perform compatibility decompositions so that
+ * you can treat compatibility characters the same as their equivalents.
+ * Finally, unorm_normalize rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation.
+ * It allows to work on strings that are not necessarily normalized
+ * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
+ * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
+ * themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
+ * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
+ * as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
+ * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
+ * return UNORM_YES for most strings in practice.
+ *
+ * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
+ *
+ * For more details on FCD see the collation design document:
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if normalization
+ * is turned on for the collator object.
+ * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
+ * transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC.
+ * Note also that most legacy character encodings use only precomposed forms and often do not
+ * encode any combining marks by themselves. For conversion to such character encodings the
+ * Unicode text needs to be normalized to NFC.
+ * For more usage examples, see the Unicode Standard Annex.
+ */
+
+/**
+ * Constants for normalization modes.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** No decomposition/composition. @stable ICU 2.0 */
+ UNORM_NONE = 1,
+ /** Canonical decomposition. @stable ICU 2.0 */
+ UNORM_NFD = 2,
+ /** Compatibility decomposition. @stable ICU 2.0 */
+ UNORM_NFKD = 3,
+ /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */
+ UNORM_NFC = 4,
+ /** Default normalization. @stable ICU 2.0 */
+ UNORM_DEFAULT = UNORM_NFC,
+ /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */
+ UNORM_NFKC =5,
+ /** "Fast C or D" form. @stable ICU 2.0 */
+ UNORM_FCD = 6,
+
+ /** One more than the highest normalization mode constant. @stable ICU 2.0 */
+ UNORM_MODE_COUNT
+} UNormalizationMode;
+
+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @stable ICU 2.6
+ */
+enum {
+ /**
+ * Options bit set value to select Unicode 3.2 normalization
+ * (except NormalizationCorrections).
+ * At most one Unicode version can be selected at a time.
+ * @stable ICU 2.6
+ */
+ UNORM_UNICODE_3_2=0x20
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @stable ICU 2.6
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * Normalize a string.
+ * The string will be normalized according the specified normalization mode
+ * and options.
+ * The source and result buffers must not be the same, nor overlap.
+ *
+ * @param source The string to normalize.
+ * @param sourceLength The length of source, or -1 if NUL-terminated.
+ * @param mode The normalization mode; one of UNORM_NONE,
+ * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param result A pointer to a buffer to receive the result string.
+ * The result string is NUL-terminated if possible.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_normalize(const UChar *source, int32_t sourceLength,
+ UNormalizationMode mode, int32_t options,
+ UChar *result, int32_t resultLength,
+ UErrorCode *status);
+#endif
+/**
+ * Result values for unorm_quickCheck().
+ * For details see Unicode Technical Report 15.
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * Indicates that string is not in the normalized format
+ */
+ UNORM_NO,
+ /**
+ * Indicates that string is in the normalized format
+ */
+ UNORM_YES,
+ /**
+ * Indicates that string cannot be determined if it is in the normalized
+ * format without further thorough checks.
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param sourcelength length of source to test, or -1 if NUL-terminated
+ * @param mode which normalization form to test for
+ * @param status a pointer to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_isNormalized
+ * @stable ICU 2.0
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *source, int32_t sourcelength,
+ UNormalizationMode mode,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a TRUE/FALSE result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization forward.
+ * This function (together with unorm_previous) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ *
+ * Iterative normalization is useful when only a small portion of a longer
+ * string/text needs to be processed.
+ *
+ * For example, the likelihood may be high that processing the first 10% of some
+ * text will be sufficient to find certain data.
+ * Another example: When one wants to concatenate two normalized strings and get a
+ * normalized result, it is much more efficient to normalize just a small part of
+ * the result around the concatenation place instead of re-normalizing everything.
+ *
+ * The input text is an instance of the C character iteration API UCharIterator.
+ * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
+ * other kind of text object.
+ *
+ * If a buffer overflow occurs, then the caller needs to reset the iterator to the
+ * old index and call the function again with a larger buffer - if the caller cares
+ * for the actual output.
+ * Regardless of the output buffer, the iterator will always be moved to the next
+ * normalization boundary.
+ *
+ * This function (like unorm_previous) serves two purposes:
+ *
+ * 1) To find the next boundary so that the normalization of the part of the text
+ * from the current position to that boundary does not affect and is not affected
+ * by the part of the text beyond that boundary.
+ *
+ * 2) To normalize the text up to the boundary.
+ *
+ * The second step is optional, per the doNormalize parameter.
+ * It is omitted for operations like string concatenation, where the two adjacent
+ * string ends need to be normalized together.
+ * In such a case, the output buffer will just contain a copy of the text up to the
+ * boundary.
+ *
+ * pNeededToNormalize is an output-only parameter. Its output value is only defined
+ * if normalization was requested (doNormalize) and successful (especially, no
+ * buffer overflow).
+ * It is useful for operations like a normalizing transliterator, where one would
+ * not want to replace a piece of text if it is not modified.
+ *
+ * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE
+ * if the normalization was necessary.
+ *
+ * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE.
+ *
+ * If the buffer overflows, then *pNeededToNormalize will be undefined;
+ * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
+ * will be undefined.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_previous
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization backward.
+ * This function (together with unorm_next) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ * For all details see unorm_next.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_next
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * With the input strings already being normalized,
+ * this function will use unorm_next() and unorm_previous()
+ * to find the adjacent end pieces of the input strings.
+ * Only the concatenation of these end pieces will be normalized and
+ * then concatenated with the remaining parts of the input strings.
+ *
+ * It is allowed to have dest==left to avoid copying the entire left string.
+ *
+ * @param left Left source string, may be same as dest.
+ * @param leftLength Length of left source string, or -1 if NUL-terminated.
+ * @param right Right source string. Must not be the same as dest, nor overlap.
+ * @param rightLength Length of right source string, or -1 if NUL-terminated.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and ustring.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+/**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/uobject.h b/jni/EastAsianWidth/unicode/uobject.h
new file mode 100644
index 0000000..3d8b96e
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uobject.h
@@ -0,0 +1,308 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/** U_OVERRIDE_CXX_ALLOCATION - Define this to override operator new and
+ * delete in UMemory. Enabled by default for ICU.
+ *
+ * Enabling forces all allocation of ICU object types to use ICU's
+ * memory allocation. On Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library, meaning that
+ * the app and ICU will be using different heaps.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/** U_HAVE_PLACEMENT_NEW - Define this to define the placement new and
+ * delete in UMemory for STL.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+
+/** U_HAVE_DEBUG_LOCATION_NEW - Define this to define the MFC debug
+ * version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifndef U_HAVE_DEBUG_LOCATION_NEW
+#define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+#if U_OVERRIDE_CXX_ALLOCATION
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new(size_t size);
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See new().
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new[](size_t size);
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete(void *p);
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See delete().
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete[](void *p);
+
+#if U_HAVE_PLACEMENT_NEW
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See new().
+ * @stable ICU 2.6
+ */
+ static inline void * U_EXPORT2 operator new(size_t, void *ptr) { return ptr; }
+
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See delete().
+ * @stable ICU 2.6
+ */
+ static inline void U_EXPORT2 operator delete(void *, void *) {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+ /**
+ * This method overrides the MFC debug version of the operator new
+ *
+ * @param size The requested memory size
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void * U_EXPORT2 operator new(size_t size, const char* file, int line);
+ /**
+ * This method provides a matching delete for the MFC debug new
+ *
+ * @param p The pointer to the allocated memory
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void U_EXPORT2 operator delete(void* p, const char* file, int line);
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UMemory &UMemory::operator=(const UMemory &);
+ */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions like for ICU's "poor man's RTTI".
+ * It does not contain default implementations of virtual methods
+ * like getDynamicClassID to allow derived classes such as Format
+ * to declare these as pure virtual.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class, and all subclasses within that
+ * service class tree return a pointer to the service base class
+ * (which itself is a subclass of UObject).
+ * This is because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+ /**
+ * Destructor.
+ *
+ * @stable ICU 2.2
+ */
+ virtual ~UObject();
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const = 0;
+
+protected:
+ // the following functions are protected to prevent instantiation and
+ // direct use of UObject itself
+
+ // default constructor
+ // commented out because UObject is abstract (see getDynamicClassID)
+ // inline UObject() {}
+
+ // copy constructor
+ // commented out because UObject is abstract (see getDynamicClassID)
+ // inline UObject(const UObject &other) {}
+
+#if 0
+ // TODO Sometime in the future. Implement operator==().
+ // (This comment inserted in 2.2)
+ // some or all of the following "boilerplate" functions may be made public
+ // in a future ICU4C release when all subclasses implement them
+
+ // assignment operator
+ // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+ // commented out because the implementation is the same as a compiler's default
+ // UObject &operator=(const UObject &other) { return *this; }
+
+ // comparison operators
+ virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+ inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+ // clone() commented out from the base class:
+ // some compilers do not support co-variant return types
+ // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+ // see also UObject class documentation.
+ // virtual UObject *clone() const;
+#endif
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UObject &UObject::operator=(const UObject &);
+ */
+
+// Future implementation for RTTI that support subtyping. [alan]
+//
+// public:
+// /**
+// * @internal
+// */
+// static UClassID getStaticClassID();
+//
+// /**
+// * @internal
+// */
+// UBool instanceOf(UClassID type) const;
+};
+
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files. The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ }
+
+// /**
+// * This macro adds ICU RTTI to an ICU concrete class implementation.
+// * This macro should be invoked in *.cpp files. The corresponding
+// * header should declare getDynamicClassID and getStaticClassID.
+// *
+// * @param myClass The name of the class that needs RTTI defined.
+// * @param myParent The name of the myClass's parent.
+// * @internal
+// */
+/*#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass, myParent) \
+ UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass, myParent) \
+ UClassID myClass::getDynamicClassID() const { \
+ return myClass::getStaticClassID(); \
+ }
+*/
+
+
+U_NAMESPACE_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/uobslete.h b/jni/EastAsianWidth/unicode/uobslete.h
new file mode 100644
index 0000000..4897533
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uobslete.h
@@ -0,0 +1,32 @@
+/*
+*******************************************************************************
+* Copyright (C) 2004-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name:
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+* Contains data for commenting out APIs.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UOBSLETE_H
+#define UOBSLETE_H
+
+#ifdef U_HIDE_OBSOLETE_API
+
+# if U_DISABLE_RENAMING
+# else
+# endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_OBSOLETE_API */
+#endif /* UOBSLETE_H */
+
diff --git a/jni/EastAsianWidth/unicode/urename.h b/jni/EastAsianWidth/unicode/urename.h
new file mode 100644
index 0000000..8a2b5c3
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/urename.h
@@ -0,0 +1,1730 @@
+/*
+*******************************************************************************
+* Copyright (C) 2002-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: urename.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Perl script written by Vladimir Weinstein
+*
+* Contains data for renaming ICU exports.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* Uncomment the following line to disable renaming on platforms
+ that do not use Autoconf. */
+/* #define U_DISABLE_RENAMING 1 */
+
+#if !U_DISABLE_RENAMING
+
+/* C exports renaming data */
+
+#define T_CString_int64ToString T_CString_int64ToString_3_8
+#define T_CString_integerToString T_CString_integerToString_3_8
+#define T_CString_stricmp T_CString_stricmp_3_8
+#define T_CString_stringToInteger T_CString_stringToInteger_3_8
+#define T_CString_strnicmp T_CString_strnicmp_3_8
+#define T_CString_toLowerCase T_CString_toLowerCase_3_8
+#define T_CString_toUpperCase T_CString_toUpperCase_3_8
+#define UCNV_FROM_U_CALLBACK_ESCAPE UCNV_FROM_U_CALLBACK_ESCAPE_3_8
+#define UCNV_FROM_U_CALLBACK_SKIP UCNV_FROM_U_CALLBACK_SKIP_3_8
+#define UCNV_FROM_U_CALLBACK_STOP UCNV_FROM_U_CALLBACK_STOP_3_8
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE UCNV_FROM_U_CALLBACK_SUBSTITUTE_3_8
+#define UCNV_TO_U_CALLBACK_ESCAPE UCNV_TO_U_CALLBACK_ESCAPE_3_8
+#define UCNV_TO_U_CALLBACK_SKIP UCNV_TO_U_CALLBACK_SKIP_3_8
+#define UCNV_TO_U_CALLBACK_STOP UCNV_TO_U_CALLBACK_STOP_3_8
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE UCNV_TO_U_CALLBACK_SUBSTITUTE_3_8
+#define UDataMemory_createNewInstance UDataMemory_createNewInstance_3_8
+#define UDataMemory_init UDataMemory_init_3_8
+#define UDataMemory_isLoaded UDataMemory_isLoaded_3_8
+#define UDataMemory_normalizeDataPointer UDataMemory_normalizeDataPointer_3_8
+#define UDataMemory_setData UDataMemory_setData_3_8
+#define UDatamemory_assign UDatamemory_assign_3_8
+#define _ASCIIData _ASCIIData_3_8
+#define _Bocu1Data _Bocu1Data_3_8
+#define _CESU8Data _CESU8Data_3_8
+#define _HZData _HZData_3_8
+#define _IMAPData _IMAPData_3_8
+#define _ISCIIData _ISCIIData_3_8
+#define _ISO2022Data _ISO2022Data_3_8
+#define _LMBCSData1 _LMBCSData1_3_8
+#define _LMBCSData11 _LMBCSData11_3_8
+#define _LMBCSData16 _LMBCSData16_3_8
+#define _LMBCSData17 _LMBCSData17_3_8
+#define _LMBCSData18 _LMBCSData18_3_8
+#define _LMBCSData19 _LMBCSData19_3_8
+#define _LMBCSData2 _LMBCSData2_3_8
+#define _LMBCSData3 _LMBCSData3_3_8
+#define _LMBCSData4 _LMBCSData4_3_8
+#define _LMBCSData5 _LMBCSData5_3_8
+#define _LMBCSData6 _LMBCSData6_3_8
+#define _LMBCSData8 _LMBCSData8_3_8
+#define _Latin1Data _Latin1Data_3_8
+#define _MBCSData _MBCSData_3_8
+#define _SCSUData _SCSUData_3_8
+#define _UTF16BEData _UTF16BEData_3_8
+#define _UTF16Data _UTF16Data_3_8
+#define _UTF16LEData _UTF16LEData_3_8
+#define _UTF32BEData _UTF32BEData_3_8
+#define _UTF32Data _UTF32Data_3_8
+#define _UTF32LEData _UTF32LEData_3_8
+#define _UTF7Data _UTF7Data_3_8
+#define _UTF8Data _UTF8Data_3_8
+#define cmemory_cleanup cmemory_cleanup_3_8
+#define cmemory_inUse cmemory_inUse_3_8
+#define le_close le_close_3_8
+#define le_create le_create_3_8
+#define le_getCharIndices le_getCharIndices_3_8
+#define le_getCharIndicesWithBase le_getCharIndicesWithBase_3_8
+#define le_getGlyphCount le_getGlyphCount_3_8
+#define le_getGlyphPosition le_getGlyphPosition_3_8
+#define le_getGlyphPositions le_getGlyphPositions_3_8
+#define le_getGlyphs le_getGlyphs_3_8
+#define le_layoutChars le_layoutChars_3_8
+#define le_reset le_reset_3_8
+#define locale_getKeywords locale_getKeywords_3_8
+#define locale_get_default locale_get_default_3_8
+#define locale_set_default locale_set_default_3_8
+#define pl_addFontRun pl_addFontRun_3_8
+#define pl_addLocaleRun pl_addLocaleRun_3_8
+#define pl_addValueRun pl_addValueRun_3_8
+#define pl_close pl_close_3_8
+#define pl_closeFontRuns pl_closeFontRuns_3_8
+#define pl_closeLine pl_closeLine_3_8
+#define pl_closeLocaleRuns pl_closeLocaleRuns_3_8
+#define pl_closeValueRuns pl_closeValueRuns_3_8
+#define pl_countLineRuns pl_countLineRuns_3_8
+#define pl_create pl_create_3_8
+#define pl_getAscent pl_getAscent_3_8
+#define pl_getDescent pl_getDescent_3_8
+#define pl_getFontRunCount pl_getFontRunCount_3_8
+#define pl_getFontRunFont pl_getFontRunFont_3_8
+#define pl_getFontRunLastLimit pl_getFontRunLastLimit_3_8
+#define pl_getFontRunLimit pl_getFontRunLimit_3_8
+#define pl_getLeading pl_getLeading_3_8
+#define pl_getLineAscent pl_getLineAscent_3_8
+#define pl_getLineDescent pl_getLineDescent_3_8
+#define pl_getLineLeading pl_getLineLeading_3_8
+#define pl_getLineVisualRun pl_getLineVisualRun_3_8
+#define pl_getLineWidth pl_getLineWidth_3_8
+#define pl_getLocaleRunCount pl_getLocaleRunCount_3_8
+#define pl_getLocaleRunLastLimit pl_getLocaleRunLastLimit_3_8
+#define pl_getLocaleRunLimit pl_getLocaleRunLimit_3_8
+#define pl_getLocaleRunLocale pl_getLocaleRunLocale_3_8
+#define pl_getParagraphLevel pl_getParagraphLevel_3_8
+#define pl_getTextDirection pl_getTextDirection_3_8
+#define pl_getValueRunCount pl_getValueRunCount_3_8
+#define pl_getValueRunLastLimit pl_getValueRunLastLimit_3_8
+#define pl_getValueRunLimit pl_getValueRunLimit_3_8
+#define pl_getValueRunValue pl_getValueRunValue_3_8
+#define pl_getVisualRunAscent pl_getVisualRunAscent_3_8
+#define pl_getVisualRunDescent pl_getVisualRunDescent_3_8
+#define pl_getVisualRunDirection pl_getVisualRunDirection_3_8
+#define pl_getVisualRunFont pl_getVisualRunFont_3_8
+#define pl_getVisualRunGlyphCount pl_getVisualRunGlyphCount_3_8
+#define pl_getVisualRunGlyphToCharMap pl_getVisualRunGlyphToCharMap_3_8
+#define pl_getVisualRunGlyphs pl_getVisualRunGlyphs_3_8
+#define pl_getVisualRunLeading pl_getVisualRunLeading_3_8
+#define pl_getVisualRunPositions pl_getVisualRunPositions_3_8
+#define pl_isComplex pl_isComplex_3_8
+#define pl_nextLine pl_nextLine_3_8
+#define pl_openEmptyFontRuns pl_openEmptyFontRuns_3_8
+#define pl_openEmptyLocaleRuns pl_openEmptyLocaleRuns_3_8
+#define pl_openEmptyValueRuns pl_openEmptyValueRuns_3_8
+#define pl_openFontRuns pl_openFontRuns_3_8
+#define pl_openLocaleRuns pl_openLocaleRuns_3_8
+#define pl_openValueRuns pl_openValueRuns_3_8
+#define pl_reflow pl_reflow_3_8
+#define pl_resetFontRuns pl_resetFontRuns_3_8
+#define pl_resetLocaleRuns pl_resetLocaleRuns_3_8
+#define pl_resetValueRuns pl_resetValueRuns_3_8
+#define res_countArrayItems res_countArrayItems_3_8
+#define res_findResource res_findResource_3_8
+#define res_getAlias res_getAlias_3_8
+#define res_getArrayItem res_getArrayItem_3_8
+#define res_getBinary res_getBinary_3_8
+#define res_getIntVector res_getIntVector_3_8
+#define res_getResource res_getResource_3_8
+#define res_getString res_getString_3_8
+#define res_getTableItemByIndex res_getTableItemByIndex_3_8
+#define res_getTableItemByKey res_getTableItemByKey_3_8
+#define res_load res_load_3_8
+#define res_unload res_unload_3_8
+#define transliterator_cleanup transliterator_cleanup_3_8
+#define triedict_swap triedict_swap_3_8
+#define u_UCharsToChars u_UCharsToChars_3_8
+#define u_austrcpy u_austrcpy_3_8
+#define u_austrncpy u_austrncpy_3_8
+#define u_catclose u_catclose_3_8
+#define u_catgets u_catgets_3_8
+#define u_catopen u_catopen_3_8
+#define u_charAge u_charAge_3_8
+#define u_charDigitValue u_charDigitValue_3_8
+#define u_charDirection u_charDirection_3_8
+#define u_charFromName u_charFromName_3_8
+#define u_charMirror u_charMirror_3_8
+#define u_charName u_charName_3_8
+#define u_charType u_charType_3_8
+#define u_charsToUChars u_charsToUChars_3_8
+#define u_cleanup u_cleanup_3_8
+#define u_countChar32 u_countChar32_3_8
+#define u_digit u_digit_3_8
+#define u_enumCharNames u_enumCharNames_3_8
+#define u_enumCharTypes u_enumCharTypes_3_8
+#define u_errorName u_errorName_3_8
+#define u_fclose u_fclose_3_8
+#define u_feof u_feof_3_8
+#define u_fflush u_fflush_3_8
+#define u_fgetConverter u_fgetConverter_3_8
+#define u_fgetc u_fgetc_3_8
+#define u_fgetcodepage u_fgetcodepage_3_8
+#define u_fgetcx u_fgetcx_3_8
+#define u_fgetfile u_fgetfile_3_8
+#define u_fgetlocale u_fgetlocale_3_8
+#define u_fgets u_fgets_3_8
+#define u_file_read u_file_read_3_8
+#define u_file_write u_file_write_3_8
+#define u_file_write_flush u_file_write_flush_3_8
+#define u_finit u_finit_3_8
+#define u_foldCase u_foldCase_3_8
+#define u_fopen u_fopen_3_8
+#define u_forDigit u_forDigit_3_8
+#define u_formatMessage u_formatMessage_3_8
+#define u_formatMessageWithError u_formatMessageWithError_3_8
+#define u_fprintf u_fprintf_3_8
+#define u_fprintf_u u_fprintf_u_3_8
+#define u_fputc u_fputc_3_8
+#define u_fputs u_fputs_3_8
+#define u_frewind u_frewind_3_8
+#define u_fscanf u_fscanf_3_8
+#define u_fscanf_u u_fscanf_u_3_8
+#define u_fsetcodepage u_fsetcodepage_3_8
+#define u_fsetlocale u_fsetlocale_3_8
+#define u_fsettransliterator u_fsettransliterator_3_8
+#define u_fstropen u_fstropen_3_8
+#define u_fungetc u_fungetc_3_8
+#define u_getCombiningClass u_getCombiningClass_3_8
+#define u_getDataDirectory u_getDataDirectory_3_8
+#define u_getDefaultConverter u_getDefaultConverter_3_8
+#define u_getFC_NFKC_Closure u_getFC_NFKC_Closure_3_8
+#define u_getISOComment u_getISOComment_3_8
+#define u_getIntPropertyMaxValue u_getIntPropertyMaxValue_3_8
+#define u_getIntPropertyMinValue u_getIntPropertyMinValue_3_8
+#define u_getIntPropertyValue u_getIntPropertyValue_3_8
+#define u_getNumericValue u_getNumericValue_3_8
+#define u_getPropertyEnum u_getPropertyEnum_3_8
+#define u_getPropertyName u_getPropertyName_3_8
+#define u_getPropertyValueEnum u_getPropertyValueEnum_3_8
+#define u_getPropertyValueName u_getPropertyValueName_3_8
+#define u_getUnicodeProperties u_getUnicodeProperties_3_8
+#define u_getUnicodeVersion u_getUnicodeVersion_3_8
+#define u_getVersion u_getVersion_3_8
+#define u_growBufferFromStatic u_growBufferFromStatic_3_8
+#define u_hasBinaryProperty u_hasBinaryProperty_3_8
+#define u_init u_init_3_8
+#define u_isIDIgnorable u_isIDIgnorable_3_8
+#define u_isIDPart u_isIDPart_3_8
+#define u_isIDStart u_isIDStart_3_8
+#define u_isISOControl u_isISOControl_3_8
+#define u_isJavaIDPart u_isJavaIDPart_3_8
+#define u_isJavaIDStart u_isJavaIDStart_3_8
+#define u_isJavaSpaceChar u_isJavaSpaceChar_3_8
+#define u_isMirrored u_isMirrored_3_8
+#define u_isUAlphabetic u_isUAlphabetic_3_8
+#define u_isULowercase u_isULowercase_3_8
+#define u_isUUppercase u_isUUppercase_3_8
+#define u_isUWhiteSpace u_isUWhiteSpace_3_8
+#define u_isWhitespace u_isWhitespace_3_8
+#define u_isalnum u_isalnum_3_8
+#define u_isalnumPOSIX u_isalnumPOSIX_3_8
+#define u_isalpha u_isalpha_3_8
+#define u_isbase u_isbase_3_8
+#define u_isblank u_isblank_3_8
+#define u_iscntrl u_iscntrl_3_8
+#define u_isdefined u_isdefined_3_8
+#define u_isdigit u_isdigit_3_8
+#define u_isgraph u_isgraph_3_8
+#define u_isgraphPOSIX u_isgraphPOSIX_3_8
+#define u_islower u_islower_3_8
+#define u_isprint u_isprint_3_8
+#define u_isprintPOSIX u_isprintPOSIX_3_8
+#define u_ispunct u_ispunct_3_8
+#define u_isspace u_isspace_3_8
+#define u_istitle u_istitle_3_8
+#define u_isupper u_isupper_3_8
+#define u_isxdigit u_isxdigit_3_8
+#define u_lengthOfIdenticalLevelRun u_lengthOfIdenticalLevelRun_3_8
+#define u_locbund_close u_locbund_close_3_8
+#define u_locbund_getNumberFormat u_locbund_getNumberFormat_3_8
+#define u_locbund_init u_locbund_init_3_8
+#define u_memcasecmp u_memcasecmp_3_8
+#define u_memchr u_memchr_3_8
+#define u_memchr32 u_memchr32_3_8
+#define u_memcmp u_memcmp_3_8
+#define u_memcmpCodePointOrder u_memcmpCodePointOrder_3_8
+#define u_memcpy u_memcpy_3_8
+#define u_memmove u_memmove_3_8
+#define u_memrchr u_memrchr_3_8
+#define u_memrchr32 u_memrchr32_3_8
+#define u_memset u_memset_3_8
+#define u_parseMessage u_parseMessage_3_8
+#define u_parseMessageWithError u_parseMessageWithError_3_8
+#define u_printf_parse u_printf_parse_3_8
+#define u_releaseDefaultConverter u_releaseDefaultConverter_3_8
+#define u_scanf_parse u_scanf_parse_3_8
+#define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_3_8
+#define u_setDataDirectory u_setDataDirectory_3_8
+#define u_setMemoryFunctions u_setMemoryFunctions_3_8
+#define u_setMutexFunctions u_setMutexFunctions_3_8
+#define u_shapeArabic u_shapeArabic_3_8
+#define u_snprintf u_snprintf_3_8
+#define u_snprintf_u u_snprintf_u_3_8
+#define u_sprintf u_sprintf_3_8
+#define u_sprintf_u u_sprintf_u_3_8
+#define u_sscanf u_sscanf_3_8
+#define u_sscanf_u u_sscanf_u_3_8
+#define u_strCaseCompare u_strCaseCompare_3_8
+#define u_strCompare u_strCompare_3_8
+#define u_strCompareIter u_strCompareIter_3_8
+#define u_strFindFirst u_strFindFirst_3_8
+#define u_strFindLast u_strFindLast_3_8
+#define u_strFoldCase u_strFoldCase_3_8
+#define u_strFromPunycode u_strFromPunycode_3_8
+#define u_strFromUTF32 u_strFromUTF32_3_8
+#define u_strFromUTF8 u_strFromUTF8_3_8
+#define u_strFromUTF8Lenient u_strFromUTF8Lenient_3_8
+#define u_strFromUTF8WithSub u_strFromUTF8WithSub_3_8
+#define u_strFromWCS u_strFromWCS_3_8
+#define u_strHasMoreChar32Than u_strHasMoreChar32Than_3_8
+#define u_strToLower u_strToLower_3_8
+#define u_strToPunycode u_strToPunycode_3_8
+#define u_strToTitle u_strToTitle_3_8
+#define u_strToUTF32 u_strToUTF32_3_8
+#define u_strToUTF8 u_strToUTF8_3_8
+#define u_strToUTF8WithSub u_strToUTF8WithSub_3_8
+#define u_strToUpper u_strToUpper_3_8
+#define u_strToWCS u_strToWCS_3_8
+#define u_strcasecmp u_strcasecmp_3_8
+#define u_strcat u_strcat_3_8
+#define u_strchr u_strchr_3_8
+#define u_strchr32 u_strchr32_3_8
+#define u_strcmp u_strcmp_3_8
+#define u_strcmpCodePointOrder u_strcmpCodePointOrder_3_8
+#define u_strcmpFold u_strcmpFold_3_8
+#define u_strcpy u_strcpy_3_8
+#define u_strcspn u_strcspn_3_8
+#define u_strlen u_strlen_3_8
+#define u_strncasecmp u_strncasecmp_3_8
+#define u_strncat u_strncat_3_8
+#define u_strncmp u_strncmp_3_8
+#define u_strncmpCodePointOrder u_strncmpCodePointOrder_3_8
+#define u_strncpy u_strncpy_3_8
+#define u_strpbrk u_strpbrk_3_8
+#define u_strrchr u_strrchr_3_8
+#define u_strrchr32 u_strrchr32_3_8
+#define u_strrstr u_strrstr_3_8
+#define u_strspn u_strspn_3_8
+#define u_strstr u_strstr_3_8
+#define u_strtok_r u_strtok_r_3_8
+#define u_terminateChars u_terminateChars_3_8
+#define u_terminateUChar32s u_terminateUChar32s_3_8
+#define u_terminateUChars u_terminateUChars_3_8
+#define u_terminateWChars u_terminateWChars_3_8
+#define u_tolower u_tolower_3_8
+#define u_totitle u_totitle_3_8
+#define u_toupper u_toupper_3_8
+#define u_uastrcpy u_uastrcpy_3_8
+#define u_uastrncpy u_uastrncpy_3_8
+#define u_unescape u_unescape_3_8
+#define u_unescapeAt u_unescapeAt_3_8
+#define u_versionFromString u_versionFromString_3_8
+#define u_versionToString u_versionToString_3_8
+#define u_vformatMessage u_vformatMessage_3_8
+#define u_vformatMessageWithError u_vformatMessageWithError_3_8
+#define u_vfprintf u_vfprintf_3_8
+#define u_vfprintf_u u_vfprintf_u_3_8
+#define u_vfscanf u_vfscanf_3_8
+#define u_vfscanf_u u_vfscanf_u_3_8
+#define u_vparseMessage u_vparseMessage_3_8
+#define u_vparseMessageWithError u_vparseMessageWithError_3_8
+#define u_vsnprintf u_vsnprintf_3_8
+#define u_vsnprintf_u u_vsnprintf_u_3_8
+#define u_vsprintf u_vsprintf_3_8
+#define u_vsprintf_u u_vsprintf_u_3_8
+#define u_vsscanf u_vsscanf_3_8
+#define u_vsscanf_u u_vsscanf_u_3_8
+#define u_writeDiff u_writeDiff_3_8
+#define u_writeIdenticalLevelRun u_writeIdenticalLevelRun_3_8
+#define u_writeIdenticalLevelRunTwoChars u_writeIdenticalLevelRunTwoChars_3_8
+#define ubidi_addPropertyStarts ubidi_addPropertyStarts_3_8
+#define ubidi_close ubidi_close_3_8
+#define ubidi_closeProps ubidi_closeProps_3_8
+#define ubidi_countParagraphs ubidi_countParagraphs_3_8
+#define ubidi_countRuns ubidi_countRuns_3_8
+#define ubidi_getClass ubidi_getClass_3_8
+#define ubidi_getClassCallback ubidi_getClassCallback_3_8
+#define ubidi_getCustomizedClass ubidi_getCustomizedClass_3_8
+#define ubidi_getDirection ubidi_getDirection_3_8
+#define ubidi_getDummy ubidi_getDummy_3_8
+#define ubidi_getJoiningGroup ubidi_getJoiningGroup_3_8
+#define ubidi_getJoiningType ubidi_getJoiningType_3_8
+#define ubidi_getLength ubidi_getLength_3_8
+#define ubidi_getLevelAt ubidi_getLevelAt_3_8
+#define ubidi_getLevels ubidi_getLevels_3_8
+#define ubidi_getLogicalIndex ubidi_getLogicalIndex_3_8
+#define ubidi_getLogicalMap ubidi_getLogicalMap_3_8
+#define ubidi_getLogicalRun ubidi_getLogicalRun_3_8
+#define ubidi_getMaxValue ubidi_getMaxValue_3_8
+#define ubidi_getMemory ubidi_getMemory_3_8
+#define ubidi_getMirror ubidi_getMirror_3_8
+#define ubidi_getParaLevel ubidi_getParaLevel_3_8
+#define ubidi_getParagraph ubidi_getParagraph_3_8
+#define ubidi_getParagraphByIndex ubidi_getParagraphByIndex_3_8
+#define ubidi_getProcessedLength ubidi_getProcessedLength_3_8
+#define ubidi_getReorderingMode ubidi_getReorderingMode_3_8
+#define ubidi_getReorderingOptions ubidi_getReorderingOptions_3_8
+#define ubidi_getResultLength ubidi_getResultLength_3_8
+#define ubidi_getRuns ubidi_getRuns_3_8
+#define ubidi_getSingleton ubidi_getSingleton_3_8
+#define ubidi_getText ubidi_getText_3_8
+#define ubidi_getVisualIndex ubidi_getVisualIndex_3_8
+#define ubidi_getVisualMap ubidi_getVisualMap_3_8
+#define ubidi_getVisualRun ubidi_getVisualRun_3_8
+#define ubidi_invertMap ubidi_invertMap_3_8
+#define ubidi_isBidiControl ubidi_isBidiControl_3_8
+#define ubidi_isInverse ubidi_isInverse_3_8
+#define ubidi_isJoinControl ubidi_isJoinControl_3_8
+#define ubidi_isMirrored ubidi_isMirrored_3_8
+#define ubidi_isOrderParagraphsLTR ubidi_isOrderParagraphsLTR_3_8
+#define ubidi_open ubidi_open_3_8
+#define ubidi_openSized ubidi_openSized_3_8
+#define ubidi_orderParagraphsLTR ubidi_orderParagraphsLTR_3_8
+#define ubidi_reorderLogical ubidi_reorderLogical_3_8
+#define ubidi_reorderVisual ubidi_reorderVisual_3_8
+#define ubidi_setClassCallback ubidi_setClassCallback_3_8
+#define ubidi_setInverse ubidi_setInverse_3_8
+#define ubidi_setLine ubidi_setLine_3_8
+#define ubidi_setPara ubidi_setPara_3_8
+#define ubidi_setReorderingMode ubidi_setReorderingMode_3_8
+#define ubidi_setReorderingOptions ubidi_setReorderingOptions_3_8
+#define ubidi_writeReordered ubidi_writeReordered_3_8
+#define ubidi_writeReverse ubidi_writeReverse_3_8
+#define ublock_getCode ublock_getCode_3_8
+#define ubrk_close ubrk_close_3_8
+#define ubrk_countAvailable ubrk_countAvailable_3_8
+#define ubrk_current ubrk_current_3_8
+#define ubrk_first ubrk_first_3_8
+#define ubrk_following ubrk_following_3_8
+#define ubrk_getAvailable ubrk_getAvailable_3_8
+#define ubrk_getLocaleByType ubrk_getLocaleByType_3_8
+#define ubrk_getRuleStatus ubrk_getRuleStatus_3_8
+#define ubrk_getRuleStatusVec ubrk_getRuleStatusVec_3_8
+#define ubrk_isBoundary ubrk_isBoundary_3_8
+#define ubrk_last ubrk_last_3_8
+#define ubrk_next ubrk_next_3_8
+#define ubrk_open ubrk_open_3_8
+#define ubrk_openRules ubrk_openRules_3_8
+#define ubrk_preceding ubrk_preceding_3_8
+#define ubrk_previous ubrk_previous_3_8
+#define ubrk_safeClone ubrk_safeClone_3_8
+#define ubrk_setText ubrk_setText_3_8
+#define ubrk_setUText ubrk_setUText_3_8
+#define ubrk_swap ubrk_swap_3_8
+#define ucal_add ucal_add_3_8
+#define ucal_clear ucal_clear_3_8
+#define ucal_clearField ucal_clearField_3_8
+#define ucal_close ucal_close_3_8
+#define ucal_countAvailable ucal_countAvailable_3_8
+#define ucal_equivalentTo ucal_equivalentTo_3_8
+#define ucal_get ucal_get_3_8
+#define ucal_getAttribute ucal_getAttribute_3_8
+#define ucal_getAvailable ucal_getAvailable_3_8
+#define ucal_getDSTSavings ucal_getDSTSavings_3_8
+#define ucal_getDefaultTimeZone ucal_getDefaultTimeZone_3_8
+#define ucal_getGregorianChange ucal_getGregorianChange_3_8
+#define ucal_getLimit ucal_getLimit_3_8
+#define ucal_getLocaleByType ucal_getLocaleByType_3_8
+#define ucal_getMillis ucal_getMillis_3_8
+#define ucal_getNow ucal_getNow_3_8
+#define ucal_getTZDataVersion ucal_getTZDataVersion_3_8
+#define ucal_getTimeZoneDisplayName ucal_getTimeZoneDisplayName_3_8
+#define ucal_inDaylightTime ucal_inDaylightTime_3_8
+#define ucal_isSet ucal_isSet_3_8
+#define ucal_open ucal_open_3_8
+#define ucal_openCountryTimeZones ucal_openCountryTimeZones_3_8
+#define ucal_openTimeZones ucal_openTimeZones_3_8
+#define ucal_roll ucal_roll_3_8
+#define ucal_set ucal_set_3_8
+#define ucal_setAttribute ucal_setAttribute_3_8
+#define ucal_setDate ucal_setDate_3_8
+#define ucal_setDateTime ucal_setDateTime_3_8
+#define ucal_setDefaultTimeZone ucal_setDefaultTimeZone_3_8
+#define ucal_setGregorianChange ucal_setGregorianChange_3_8
+#define ucal_setMillis ucal_setMillis_3_8
+#define ucal_setTimeZone ucal_setTimeZone_3_8
+#define ucase_addCaseClosure ucase_addCaseClosure_3_8
+#define ucase_addPropertyStarts ucase_addPropertyStarts_3_8
+#define ucase_addStringCaseClosure ucase_addStringCaseClosure_3_8
+#define ucase_close ucase_close_3_8
+#define ucase_fold ucase_fold_3_8
+#define ucase_getCaseLocale ucase_getCaseLocale_3_8
+#define ucase_getDummy ucase_getDummy_3_8
+#define ucase_getSingleton ucase_getSingleton_3_8
+#define ucase_getType ucase_getType_3_8
+#define ucase_getTypeOrIgnorable ucase_getTypeOrIgnorable_3_8
+#define ucase_hasBinaryProperty ucase_hasBinaryProperty_3_8
+#define ucase_isCaseSensitive ucase_isCaseSensitive_3_8
+#define ucase_isSoftDotted ucase_isSoftDotted_3_8
+#define ucase_toFullFolding ucase_toFullFolding_3_8
+#define ucase_toFullLower ucase_toFullLower_3_8
+#define ucase_toFullTitle ucase_toFullTitle_3_8
+#define ucase_toFullUpper ucase_toFullUpper_3_8
+#define ucase_tolower ucase_tolower_3_8
+#define ucase_totitle ucase_totitle_3_8
+#define ucase_toupper ucase_toupper_3_8
+#define ucasemap_close ucasemap_close_3_8
+#define ucasemap_getBreakIterator ucasemap_getBreakIterator_3_8
+#define ucasemap_getLocale ucasemap_getLocale_3_8
+#define ucasemap_getOptions ucasemap_getOptions_3_8
+#define ucasemap_open ucasemap_open_3_8
+#define ucasemap_setBreakIterator ucasemap_setBreakIterator_3_8
+#define ucasemap_setLocale ucasemap_setLocale_3_8
+#define ucasemap_setOptions ucasemap_setOptions_3_8
+#define ucasemap_toTitle ucasemap_toTitle_3_8
+#define ucasemap_utf8FoldCase ucasemap_utf8FoldCase_3_8
+#define ucasemap_utf8ToLower ucasemap_utf8ToLower_3_8
+#define ucasemap_utf8ToTitle ucasemap_utf8ToTitle_3_8
+#define ucasemap_utf8ToUpper ucasemap_utf8ToUpper_3_8
+#define uchar_addPropertyStarts uchar_addPropertyStarts_3_8
+#define uchar_getHST uchar_getHST_3_8
+#define uchar_swapNames uchar_swapNames_3_8
+#define ucln_common_registerCleanup ucln_common_registerCleanup_3_8
+#define ucln_i18n_registerCleanup ucln_i18n_registerCleanup_3_8
+#define ucln_io_registerCleanup ucln_io_registerCleanup_3_8
+#define ucln_lib_cleanup ucln_lib_cleanup_3_8
+#define ucln_registerCleanup ucln_registerCleanup_3_8
+#define ucnv_MBCSFromUChar32 ucnv_MBCSFromUChar32_3_8
+#define ucnv_MBCSFromUnicodeWithOffsets ucnv_MBCSFromUnicodeWithOffsets_3_8
+#define ucnv_MBCSGetType ucnv_MBCSGetType_3_8
+#define ucnv_MBCSGetUnicodeSetForBytes ucnv_MBCSGetUnicodeSetForBytes_3_8
+#define ucnv_MBCSGetUnicodeSetForUnicode ucnv_MBCSGetUnicodeSetForUnicode_3_8
+#define ucnv_MBCSIsLeadByte ucnv_MBCSIsLeadByte_3_8
+#define ucnv_MBCSSimpleGetNextUChar ucnv_MBCSSimpleGetNextUChar_3_8
+#define ucnv_MBCSToUnicodeWithOffsets ucnv_MBCSToUnicodeWithOffsets_3_8
+#define ucnv_bld_countAvailableConverters ucnv_bld_countAvailableConverters_3_8
+#define ucnv_bld_getAvailableConverter ucnv_bld_getAvailableConverter_3_8
+#define ucnv_cbFromUWriteBytes ucnv_cbFromUWriteBytes_3_8
+#define ucnv_cbFromUWriteSub ucnv_cbFromUWriteSub_3_8
+#define ucnv_cbFromUWriteUChars ucnv_cbFromUWriteUChars_3_8
+#define ucnv_cbToUWriteSub ucnv_cbToUWriteSub_3_8
+#define ucnv_cbToUWriteUChars ucnv_cbToUWriteUChars_3_8
+#define ucnv_close ucnv_close_3_8
+#define ucnv_compareNames ucnv_compareNames_3_8
+#define ucnv_convert ucnv_convert_3_8
+#define ucnv_convertEx ucnv_convertEx_3_8
+#define ucnv_countAliases ucnv_countAliases_3_8
+#define ucnv_countAvailable ucnv_countAvailable_3_8
+#define ucnv_countStandards ucnv_countStandards_3_8
+#define ucnv_createAlgorithmicConverter ucnv_createAlgorithmicConverter_3_8
+#define ucnv_createConverter ucnv_createConverter_3_8
+#define ucnv_createConverterFromPackage ucnv_createConverterFromPackage_3_8
+#define ucnv_createConverterFromSharedData ucnv_createConverterFromSharedData_3_8
+#define ucnv_detectUnicodeSignature ucnv_detectUnicodeSignature_3_8
+#define ucnv_extContinueMatchFromU ucnv_extContinueMatchFromU_3_8
+#define ucnv_extContinueMatchToU ucnv_extContinueMatchToU_3_8
+#define ucnv_extGetUnicodeSet ucnv_extGetUnicodeSet_3_8
+#define ucnv_extInitialMatchFromU ucnv_extInitialMatchFromU_3_8
+#define ucnv_extInitialMatchToU ucnv_extInitialMatchToU_3_8
+#define ucnv_extSimpleMatchFromU ucnv_extSimpleMatchFromU_3_8
+#define ucnv_extSimpleMatchToU ucnv_extSimpleMatchToU_3_8
+#define ucnv_fixFileSeparator ucnv_fixFileSeparator_3_8
+#define ucnv_flushCache ucnv_flushCache_3_8
+#define ucnv_fromAlgorithmic ucnv_fromAlgorithmic_3_8
+#define ucnv_fromUChars ucnv_fromUChars_3_8
+#define ucnv_fromUCountPending ucnv_fromUCountPending_3_8
+#define ucnv_fromUWriteBytes ucnv_fromUWriteBytes_3_8
+#define ucnv_fromUnicode ucnv_fromUnicode_3_8
+#define ucnv_fromUnicode_UTF8 ucnv_fromUnicode_UTF8_3_8
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC ucnv_fromUnicode_UTF8_OFFSETS_LOGIC_3_8
+#define ucnv_getAlias ucnv_getAlias_3_8
+#define ucnv_getAliases ucnv_getAliases_3_8
+#define ucnv_getAvailableName ucnv_getAvailableName_3_8
+#define ucnv_getCCSID ucnv_getCCSID_3_8
+#define ucnv_getCanonicalName ucnv_getCanonicalName_3_8
+#define ucnv_getCompleteUnicodeSet ucnv_getCompleteUnicodeSet_3_8
+#define ucnv_getDefaultName ucnv_getDefaultName_3_8
+#define ucnv_getDisplayName ucnv_getDisplayName_3_8
+#define ucnv_getFromUCallBack ucnv_getFromUCallBack_3_8
+#define ucnv_getInvalidChars ucnv_getInvalidChars_3_8
+#define ucnv_getInvalidUChars ucnv_getInvalidUChars_3_8
+#define ucnv_getMaxCharSize ucnv_getMaxCharSize_3_8
+#define ucnv_getMinCharSize ucnv_getMinCharSize_3_8
+#define ucnv_getName ucnv_getName_3_8
+#define ucnv_getNextUChar ucnv_getNextUChar_3_8
+#define ucnv_getNonSurrogateUnicodeSet ucnv_getNonSurrogateUnicodeSet_3_8
+#define ucnv_getPlatform ucnv_getPlatform_3_8
+#define ucnv_getStandard ucnv_getStandard_3_8
+#define ucnv_getStandardName ucnv_getStandardName_3_8
+#define ucnv_getStarters ucnv_getStarters_3_8
+#define ucnv_getSubstChars ucnv_getSubstChars_3_8
+#define ucnv_getToUCallBack ucnv_getToUCallBack_3_8
+#define ucnv_getType ucnv_getType_3_8
+#define ucnv_getUnicodeSet ucnv_getUnicodeSet_3_8
+#define ucnv_incrementRefCount ucnv_incrementRefCount_3_8
+#define ucnv_io_countKnownConverters ucnv_io_countKnownConverters_3_8
+#define ucnv_io_getConverterName ucnv_io_getConverterName_3_8
+#define ucnv_io_stripASCIIForCompare ucnv_io_stripASCIIForCompare_3_8
+#define ucnv_io_stripEBCDICForCompare ucnv_io_stripEBCDICForCompare_3_8
+#define ucnv_isAmbiguous ucnv_isAmbiguous_3_8
+#define ucnv_load ucnv_load_3_8
+#define ucnv_loadSharedData ucnv_loadSharedData_3_8
+#define ucnv_open ucnv_open_3_8
+#define ucnv_openAllNames ucnv_openAllNames_3_8
+#define ucnv_openCCSID ucnv_openCCSID_3_8
+#define ucnv_openPackage ucnv_openPackage_3_8
+#define ucnv_openStandardNames ucnv_openStandardNames_3_8
+#define ucnv_openU ucnv_openU_3_8
+#define ucnv_reset ucnv_reset_3_8
+#define ucnv_resetFromUnicode ucnv_resetFromUnicode_3_8
+#define ucnv_resetToUnicode ucnv_resetToUnicode_3_8
+#define ucnv_safeClone ucnv_safeClone_3_8
+#define ucnv_setDefaultName ucnv_setDefaultName_3_8
+#define ucnv_setFallback ucnv_setFallback_3_8
+#define ucnv_setFromUCallBack ucnv_setFromUCallBack_3_8
+#define ucnv_setSubstChars ucnv_setSubstChars_3_8
+#define ucnv_setSubstString ucnv_setSubstString_3_8
+#define ucnv_setToUCallBack ucnv_setToUCallBack_3_8
+#define ucnv_swap ucnv_swap_3_8
+#define ucnv_swapAliases ucnv_swapAliases_3_8
+#define ucnv_toAlgorithmic ucnv_toAlgorithmic_3_8
+#define ucnv_toUChars ucnv_toUChars_3_8
+#define ucnv_toUCountPending ucnv_toUCountPending_3_8
+#define ucnv_toUWriteCodePoint ucnv_toUWriteCodePoint_3_8
+#define ucnv_toUWriteUChars ucnv_toUWriteUChars_3_8
+#define ucnv_toUnicode ucnv_toUnicode_3_8
+#define ucnv_unload ucnv_unload_3_8
+#define ucnv_unloadSharedDataIfReady ucnv_unloadSharedDataIfReady_3_8
+#define ucnv_usesFallback ucnv_usesFallback_3_8
+#define ucol_allocWeights ucol_allocWeights_3_8
+#define ucol_assembleTailoringTable ucol_assembleTailoringTable_3_8
+#define ucol_calcSortKey ucol_calcSortKey_3_8
+#define ucol_calcSortKeySimpleTertiary ucol_calcSortKeySimpleTertiary_3_8
+#define ucol_cloneBinary ucol_cloneBinary_3_8
+#define ucol_cloneRuleData ucol_cloneRuleData_3_8
+#define ucol_close ucol_close_3_8
+#define ucol_closeElements ucol_closeElements_3_8
+#define ucol_collatorToIdentifier ucol_collatorToIdentifier_3_8
+#define ucol_countAvailable ucol_countAvailable_3_8
+#define ucol_createElements ucol_createElements_3_8
+#define ucol_doCE ucol_doCE_3_8
+#define ucol_equal ucol_equal_3_8
+#define ucol_equals ucol_equals_3_8
+#define ucol_forgetUCA ucol_forgetUCA_3_8
+#define ucol_getAttribute ucol_getAttribute_3_8
+#define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_3_8
+#define ucol_getAvailable ucol_getAvailable_3_8
+#define ucol_getBound ucol_getBound_3_8
+#define ucol_getCEGenerator ucol_getCEGenerator_3_8
+#define ucol_getCEStrengthDifference ucol_getCEStrengthDifference_3_8
+#define ucol_getContractions ucol_getContractions_3_8
+#define ucol_getContractionsAndExpansions ucol_getContractionsAndExpansions_3_8
+#define ucol_getDisplayName ucol_getDisplayName_3_8
+#define ucol_getFirstCE ucol_getFirstCE_3_8
+#define ucol_getFunctionalEquivalent ucol_getFunctionalEquivalent_3_8
+#define ucol_getKeywordValues ucol_getKeywordValues_3_8
+#define ucol_getKeywords ucol_getKeywords_3_8
+#define ucol_getLocale ucol_getLocale_3_8
+#define ucol_getLocaleByType ucol_getLocaleByType_3_8
+#define ucol_getMaxExpansion ucol_getMaxExpansion_3_8
+#define ucol_getNextCE ucol_getNextCE_3_8
+#define ucol_getNextGenerated ucol_getNextGenerated_3_8
+#define ucol_getOffset ucol_getOffset_3_8
+#define ucol_getPrevCE ucol_getPrevCE_3_8
+#define ucol_getRules ucol_getRules_3_8
+#define ucol_getRulesEx ucol_getRulesEx_3_8
+#define ucol_getShortDefinitionString ucol_getShortDefinitionString_3_8
+#define ucol_getSimpleCEGenerator ucol_getSimpleCEGenerator_3_8
+#define ucol_getSortKey ucol_getSortKey_3_8
+#define ucol_getSortKeySize ucol_getSortKeySize_3_8
+#define ucol_getSortKeyWithAllocation ucol_getSortKeyWithAllocation_3_8
+#define ucol_getStrength ucol_getStrength_3_8
+#define ucol_getTailoredSet ucol_getTailoredSet_3_8
+#define ucol_getUCAVersion ucol_getUCAVersion_3_8
+#define ucol_getUnsafeSet ucol_getUnsafeSet_3_8
+#define ucol_getVariableTop ucol_getVariableTop_3_8
+#define ucol_getVersion ucol_getVersion_3_8
+#define ucol_greater ucol_greater_3_8
+#define ucol_greaterOrEqual ucol_greaterOrEqual_3_8
+#define ucol_identifierToShortString ucol_identifierToShortString_3_8
+#define ucol_initBuffers ucol_initBuffers_3_8
+#define ucol_initCollator ucol_initCollator_3_8
+#define ucol_initInverseUCA ucol_initInverseUCA_3_8
+#define ucol_initUCA ucol_initUCA_3_8
+#define ucol_inv_getGapPositions ucol_inv_getGapPositions_3_8
+#define ucol_inv_getNextCE ucol_inv_getNextCE_3_8
+#define ucol_inv_getPrevCE ucol_inv_getPrevCE_3_8
+#define ucol_isTailored ucol_isTailored_3_8
+#define ucol_keyHashCode ucol_keyHashCode_3_8
+#define ucol_mergeSortkeys ucol_mergeSortkeys_3_8
+#define ucol_next ucol_next_3_8
+#define ucol_nextSortKeyPart ucol_nextSortKeyPart_3_8
+#define ucol_nextWeight ucol_nextWeight_3_8
+#define ucol_normalizeShortDefinitionString ucol_normalizeShortDefinitionString_3_8
+#define ucol_open ucol_open_3_8
+#define ucol_openAvailableLocales ucol_openAvailableLocales_3_8
+#define ucol_openBinary ucol_openBinary_3_8
+#define ucol_openElements ucol_openElements_3_8
+#define ucol_openFromIdentifier ucol_openFromIdentifier_3_8
+#define ucol_openFromShortString ucol_openFromShortString_3_8
+#define ucol_openRules ucol_openRules_3_8
+#define ucol_open_internal ucol_open_internal_3_8
+#define ucol_prepareShortStringOpen ucol_prepareShortStringOpen_3_8
+#define ucol_previous ucol_previous_3_8
+#define ucol_primaryOrder ucol_primaryOrder_3_8
+#define ucol_prv_getSpecialCE ucol_prv_getSpecialCE_3_8
+#define ucol_prv_getSpecialPrevCE ucol_prv_getSpecialPrevCE_3_8
+#define ucol_reset ucol_reset_3_8
+#define ucol_restoreVariableTop ucol_restoreVariableTop_3_8
+#define ucol_safeClone ucol_safeClone_3_8
+#define ucol_secondaryOrder ucol_secondaryOrder_3_8
+#define ucol_setAttribute ucol_setAttribute_3_8
+#define ucol_setOffset ucol_setOffset_3_8
+#define ucol_setOptionsFromHeader ucol_setOptionsFromHeader_3_8
+#define ucol_setReqValidLocales ucol_setReqValidLocales_3_8
+#define ucol_setStrength ucol_setStrength_3_8
+#define ucol_setText ucol_setText_3_8
+#define ucol_setVariableTop ucol_setVariableTop_3_8
+#define ucol_shortStringToIdentifier ucol_shortStringToIdentifier_3_8
+#define ucol_strcoll ucol_strcoll_3_8
+#define ucol_strcollIter ucol_strcollIter_3_8
+#define ucol_swap ucol_swap_3_8
+#define ucol_swapBinary ucol_swapBinary_3_8
+#define ucol_swapInverseUCA ucol_swapInverseUCA_3_8
+#define ucol_tertiaryOrder ucol_tertiaryOrder_3_8
+#define ucol_tok_assembleTokenList ucol_tok_assembleTokenList_3_8
+#define ucol_tok_closeTokenList ucol_tok_closeTokenList_3_8
+#define ucol_tok_getNextArgument ucol_tok_getNextArgument_3_8
+#define ucol_tok_initTokenList ucol_tok_initTokenList_3_8
+#define ucol_tok_parseNextToken ucol_tok_parseNextToken_3_8
+#define ucol_updateInternalState ucol_updateInternalState_3_8
+#define ucsdet_close ucsdet_close_3_8
+#define ucsdet_detect ucsdet_detect_3_8
+#define ucsdet_detectAll ucsdet_detectAll_3_8
+#define ucsdet_enableInputFilter ucsdet_enableInputFilter_3_8
+#define ucsdet_getAllDetectableCharsets ucsdet_getAllDetectableCharsets_3_8
+#define ucsdet_getConfidence ucsdet_getConfidence_3_8
+#define ucsdet_getLanguage ucsdet_getLanguage_3_8
+#define ucsdet_getName ucsdet_getName_3_8
+#define ucsdet_getUChars ucsdet_getUChars_3_8
+#define ucsdet_isInputFilterEnabled ucsdet_isInputFilterEnabled_3_8
+#define ucsdet_open ucsdet_open_3_8
+#define ucsdet_setDeclaredEncoding ucsdet_setDeclaredEncoding_3_8
+#define ucsdet_setText ucsdet_setText_3_8
+#define ucurr_forLocale ucurr_forLocale_3_8
+#define ucurr_getDefaultFractionDigits ucurr_getDefaultFractionDigits_3_8
+#define ucurr_getName ucurr_getName_3_8
+#define ucurr_getRoundingIncrement ucurr_getRoundingIncrement_3_8
+#define ucurr_openISOCurrencies ucurr_openISOCurrencies_3_8
+#define ucurr_register ucurr_register_3_8
+#define ucurr_unregister ucurr_unregister_3_8
+#define udat_applyPattern udat_applyPattern_3_8
+#define udat_clone udat_clone_3_8
+#define udat_close udat_close_3_8
+#define udat_countAvailable udat_countAvailable_3_8
+#define udat_countSymbols udat_countSymbols_3_8
+#define udat_format udat_format_3_8
+#define udat_get2DigitYearStart udat_get2DigitYearStart_3_8
+#define udat_getAvailable udat_getAvailable_3_8
+#define udat_getCalendar udat_getCalendar_3_8
+#define udat_getLocaleByType udat_getLocaleByType_3_8
+#define udat_getNumberFormat udat_getNumberFormat_3_8
+#define udat_getSymbols udat_getSymbols_3_8
+#define udat_isLenient udat_isLenient_3_8
+#define udat_open udat_open_3_8
+#define udat_parse udat_parse_3_8
+#define udat_parseCalendar udat_parseCalendar_3_8
+#define udat_set2DigitYearStart udat_set2DigitYearStart_3_8
+#define udat_setCalendar udat_setCalendar_3_8
+#define udat_setLenient udat_setLenient_3_8
+#define udat_setNumberFormat udat_setNumberFormat_3_8
+#define udat_setSymbols udat_setSymbols_3_8
+#define udat_toPattern udat_toPattern_3_8
+#define udata_checkCommonData udata_checkCommonData_3_8
+#define udata_close udata_close_3_8
+#define udata_closeSwapper udata_closeSwapper_3_8
+#define udata_getHeaderSize udata_getHeaderSize_3_8
+#define udata_getInfo udata_getInfo_3_8
+#define udata_getInfoSize udata_getInfoSize_3_8
+#define udata_getLength udata_getLength_3_8
+#define udata_getMemory udata_getMemory_3_8
+#define udata_getRawMemory udata_getRawMemory_3_8
+#define udata_open udata_open_3_8
+#define udata_openChoice udata_openChoice_3_8
+#define udata_openSwapper udata_openSwapper_3_8
+#define udata_openSwapperForInputData udata_openSwapperForInputData_3_8
+#define udata_printError udata_printError_3_8
+#define udata_readInt16 udata_readInt16_3_8
+#define udata_readInt32 udata_readInt32_3_8
+#define udata_setAppData udata_setAppData_3_8
+#define udata_setCommonData udata_setCommonData_3_8
+#define udata_setFileAccess udata_setFileAccess_3_8
+#define udata_swapDataHeader udata_swapDataHeader_3_8
+#define udata_swapInvStringBlock udata_swapInvStringBlock_3_8
+#define udatpg_addPattern udatpg_addPattern_3_8
+#define udatpg_clone udatpg_clone_3_8
+#define udatpg_close udatpg_close_3_8
+#define udatpg_getAppendItemFormat udatpg_getAppendItemFormat_3_8
+#define udatpg_getAppendItemName udatpg_getAppendItemName_3_8
+#define udatpg_getBaseSkeleton udatpg_getBaseSkeleton_3_8
+#define udatpg_getBestPattern udatpg_getBestPattern_3_8
+#define udatpg_getDateTimeFormat udatpg_getDateTimeFormat_3_8
+#define udatpg_getDecimal udatpg_getDecimal_3_8
+#define udatpg_getPatternForSkeleton udatpg_getPatternForSkeleton_3_8
+#define udatpg_getSkeleton udatpg_getSkeleton_3_8
+#define udatpg_open udatpg_open_3_8
+#define udatpg_openBaseSkeletons udatpg_openBaseSkeletons_3_8
+#define udatpg_openEmpty udatpg_openEmpty_3_8
+#define udatpg_openSkeletons udatpg_openSkeletons_3_8
+#define udatpg_replaceFieldTypes udatpg_replaceFieldTypes_3_8
+#define udatpg_setAppendItemFormat udatpg_setAppendItemFormat_3_8
+#define udatpg_setAppendItemName udatpg_setAppendItemName_3_8
+#define udatpg_setDateTimeFormat udatpg_setDateTimeFormat_3_8
+#define udatpg_setDecimal udatpg_setDecimal_3_8
+#define uenum_close uenum_close_3_8
+#define uenum_count uenum_count_3_8
+#define uenum_next uenum_next_3_8
+#define uenum_nextDefault uenum_nextDefault_3_8
+#define uenum_openCharStringsEnumeration uenum_openCharStringsEnumeration_3_8
+#define uenum_openStringEnumeration uenum_openStringEnumeration_3_8
+#define uenum_reset uenum_reset_3_8
+#define uenum_unext uenum_unext_3_8
+#define uenum_unextDefault uenum_unextDefault_3_8
+#define ufile_close_translit ufile_close_translit_3_8
+#define ufile_fill_uchar_buffer ufile_fill_uchar_buffer_3_8
+#define ufile_flush_translit ufile_flush_translit_3_8
+#define ufile_getch ufile_getch_3_8
+#define ufile_getch32 ufile_getch32_3_8
+#define ufmt_64tou ufmt_64tou_3_8
+#define ufmt_defaultCPToUnicode ufmt_defaultCPToUnicode_3_8
+#define ufmt_digitvalue ufmt_digitvalue_3_8
+#define ufmt_isdigit ufmt_isdigit_3_8
+#define ufmt_ptou ufmt_ptou_3_8
+#define ufmt_uto64 ufmt_uto64_3_8
+#define ufmt_utop ufmt_utop_3_8
+#define uhash_close uhash_close_3_8
+#define uhash_compareCaselessUnicodeString uhash_compareCaselessUnicodeString_3_8
+#define uhash_compareChars uhash_compareChars_3_8
+#define uhash_compareIChars uhash_compareIChars_3_8
+#define uhash_compareLong uhash_compareLong_3_8
+#define uhash_compareUChars uhash_compareUChars_3_8
+#define uhash_compareUnicodeString uhash_compareUnicodeString_3_8
+#define uhash_count uhash_count_3_8
+#define uhash_deleteHashtable uhash_deleteHashtable_3_8
+#define uhash_deleteUVector uhash_deleteUVector_3_8
+#define uhash_deleteUnicodeString uhash_deleteUnicodeString_3_8
+#define uhash_equals uhash_equals_3_8
+#define uhash_find uhash_find_3_8
+#define uhash_freeBlock uhash_freeBlock_3_8
+#define uhash_get uhash_get_3_8
+#define uhash_geti uhash_geti_3_8
+#define uhash_hashCaselessUnicodeString uhash_hashCaselessUnicodeString_3_8
+#define uhash_hashChars uhash_hashChars_3_8
+#define uhash_hashIChars uhash_hashIChars_3_8
+#define uhash_hashLong uhash_hashLong_3_8
+#define uhash_hashUChars uhash_hashUChars_3_8
+#define uhash_hashUCharsN uhash_hashUCharsN_3_8
+#define uhash_hashUnicodeString uhash_hashUnicodeString_3_8
+#define uhash_iget uhash_iget_3_8
+#define uhash_igeti uhash_igeti_3_8
+#define uhash_init uhash_init_3_8
+#define uhash_iput uhash_iput_3_8
+#define uhash_iputi uhash_iputi_3_8
+#define uhash_iremove uhash_iremove_3_8
+#define uhash_iremovei uhash_iremovei_3_8
+#define uhash_nextElement uhash_nextElement_3_8
+#define uhash_open uhash_open_3_8
+#define uhash_openSize uhash_openSize_3_8
+#define uhash_put uhash_put_3_8
+#define uhash_puti uhash_puti_3_8
+#define uhash_remove uhash_remove_3_8
+#define uhash_removeAll uhash_removeAll_3_8
+#define uhash_removeElement uhash_removeElement_3_8
+#define uhash_removei uhash_removei_3_8
+#define uhash_setKeyComparator uhash_setKeyComparator_3_8
+#define uhash_setKeyDeleter uhash_setKeyDeleter_3_8
+#define uhash_setKeyHasher uhash_setKeyHasher_3_8
+#define uhash_setResizePolicy uhash_setResizePolicy_3_8
+#define uhash_setValueComparator uhash_setValueComparator_3_8
+#define uhash_setValueDeleter uhash_setValueDeleter_3_8
+#define uhst_addPropertyStarts uhst_addPropertyStarts_3_8
+#define uidna_IDNToASCII uidna_IDNToASCII_3_8
+#define uidna_IDNToUnicode uidna_IDNToUnicode_3_8
+#define uidna_compare uidna_compare_3_8
+#define uidna_toASCII uidna_toASCII_3_8
+#define uidna_toUnicode uidna_toUnicode_3_8
+#define uiter_current32 uiter_current32_3_8
+#define uiter_getState uiter_getState_3_8
+#define uiter_next32 uiter_next32_3_8
+#define uiter_previous32 uiter_previous32_3_8
+#define uiter_setCharacterIterator uiter_setCharacterIterator_3_8
+#define uiter_setReplaceable uiter_setReplaceable_3_8
+#define uiter_setState uiter_setState_3_8
+#define uiter_setString uiter_setString_3_8
+#define uiter_setUTF16BE uiter_setUTF16BE_3_8
+#define uiter_setUTF8 uiter_setUTF8_3_8
+#define uloc_acceptLanguage uloc_acceptLanguage_3_8
+#define uloc_acceptLanguageFromHTTP uloc_acceptLanguageFromHTTP_3_8
+#define uloc_canonicalize uloc_canonicalize_3_8
+#define uloc_countAvailable uloc_countAvailable_3_8
+#define uloc_getAvailable uloc_getAvailable_3_8
+#define uloc_getBaseName uloc_getBaseName_3_8
+#define uloc_getCountry uloc_getCountry_3_8
+#define uloc_getDefault uloc_getDefault_3_8
+#define uloc_getDisplayCountry uloc_getDisplayCountry_3_8
+#define uloc_getDisplayKeyword uloc_getDisplayKeyword_3_8
+#define uloc_getDisplayKeywordValue uloc_getDisplayKeywordValue_3_8
+#define uloc_getDisplayLanguage uloc_getDisplayLanguage_3_8
+#define uloc_getDisplayName uloc_getDisplayName_3_8
+#define uloc_getDisplayScript uloc_getDisplayScript_3_8
+#define uloc_getDisplayVariant uloc_getDisplayVariant_3_8
+#define uloc_getISO3Country uloc_getISO3Country_3_8
+#define uloc_getISO3Language uloc_getISO3Language_3_8
+#define uloc_getISOCountries uloc_getISOCountries_3_8
+#define uloc_getISOLanguages uloc_getISOLanguages_3_8
+#define uloc_getKeywordValue uloc_getKeywordValue_3_8
+#define uloc_getLCID uloc_getLCID_3_8
+#define uloc_getLanguage uloc_getLanguage_3_8
+#define uloc_getLocaleForLCID uloc_getLocaleForLCID_3_8
+#define uloc_getName uloc_getName_3_8
+#define uloc_getParent uloc_getParent_3_8
+#define uloc_getScript uloc_getScript_3_8
+#define uloc_getVariant uloc_getVariant_3_8
+#define uloc_openKeywordList uloc_openKeywordList_3_8
+#define uloc_openKeywords uloc_openKeywords_3_8
+#define uloc_setDefault uloc_setDefault_3_8
+#define uloc_setKeywordValue uloc_setKeywordValue_3_8
+#define ulocdata_close ulocdata_close_3_8
+#define ulocdata_getDelimiter ulocdata_getDelimiter_3_8
+#define ulocdata_getExemplarSet ulocdata_getExemplarSet_3_8
+#define ulocdata_getMeasurementSystem ulocdata_getMeasurementSystem_3_8
+#define ulocdata_getNoSubstitute ulocdata_getNoSubstitute_3_8
+#define ulocdata_getPaperSize ulocdata_getPaperSize_3_8
+#define ulocdata_open ulocdata_open_3_8
+#define ulocdata_setNoSubstitute ulocdata_setNoSubstitute_3_8
+#define umsg_applyPattern umsg_applyPattern_3_8
+#define umsg_autoQuoteApostrophe umsg_autoQuoteApostrophe_3_8
+#define umsg_clone umsg_clone_3_8
+#define umsg_close umsg_close_3_8
+#define umsg_format umsg_format_3_8
+#define umsg_getLocale umsg_getLocale_3_8
+#define umsg_open umsg_open_3_8
+#define umsg_parse umsg_parse_3_8
+#define umsg_setLocale umsg_setLocale_3_8
+#define umsg_toPattern umsg_toPattern_3_8
+#define umsg_vformat umsg_vformat_3_8
+#define umsg_vparse umsg_vparse_3_8
+#define umtx_atomic_dec umtx_atomic_dec_3_8
+#define umtx_atomic_inc umtx_atomic_inc_3_8
+#define umtx_cleanup umtx_cleanup_3_8
+#define umtx_destroy umtx_destroy_3_8
+#define umtx_init umtx_init_3_8
+#define umtx_lock umtx_lock_3_8
+#define umtx_unlock umtx_unlock_3_8
+#define unorm_addPropertyStarts unorm_addPropertyStarts_3_8
+#define unorm_closeIter unorm_closeIter_3_8
+#define unorm_compare unorm_compare_3_8
+#define unorm_compose unorm_compose_3_8
+#define unorm_concatenate unorm_concatenate_3_8
+#define unorm_decompose unorm_decompose_3_8
+#define unorm_getCanonStartSet unorm_getCanonStartSet_3_8
+#define unorm_getCanonicalDecomposition unorm_getCanonicalDecomposition_3_8
+#define unorm_getDecomposition unorm_getDecomposition_3_8
+#define unorm_getFCD16FromCodePoint unorm_getFCD16FromCodePoint_3_8
+#define unorm_getFCDTrie unorm_getFCDTrie_3_8
+#define unorm_getNX unorm_getNX_3_8
+#define unorm_getQuickCheck unorm_getQuickCheck_3_8
+#define unorm_getUnicodeVersion unorm_getUnicodeVersion_3_8
+#define unorm_haveData unorm_haveData_3_8
+#define unorm_internalIsFullCompositionExclusion unorm_internalIsFullCompositionExclusion_3_8
+#define unorm_internalNormalize unorm_internalNormalize_3_8
+#define unorm_internalNormalizeWithNX unorm_internalNormalizeWithNX_3_8
+#define unorm_internalQuickCheck unorm_internalQuickCheck_3_8
+#define unorm_isCanonSafeStart unorm_isCanonSafeStart_3_8
+#define unorm_isNFSkippable unorm_isNFSkippable_3_8
+#define unorm_isNormalized unorm_isNormalized_3_8
+#define unorm_isNormalizedWithOptions unorm_isNormalizedWithOptions_3_8
+#define unorm_next unorm_next_3_8
+#define unorm_normalize unorm_normalize_3_8
+#define unorm_openIter unorm_openIter_3_8
+#define unorm_previous unorm_previous_3_8
+#define unorm_quickCheck unorm_quickCheck_3_8
+#define unorm_quickCheckWithOptions unorm_quickCheckWithOptions_3_8
+#define unorm_setIter unorm_setIter_3_8
+#define unum_applyPattern unum_applyPattern_3_8
+#define unum_clone unum_clone_3_8
+#define unum_close unum_close_3_8
+#define unum_countAvailable unum_countAvailable_3_8
+#define unum_format unum_format_3_8
+#define unum_formatDouble unum_formatDouble_3_8
+#define unum_formatDoubleCurrency unum_formatDoubleCurrency_3_8
+#define unum_formatInt64 unum_formatInt64_3_8
+#define unum_getAttribute unum_getAttribute_3_8
+#define unum_getAvailable unum_getAvailable_3_8
+#define unum_getDoubleAttribute unum_getDoubleAttribute_3_8
+#define unum_getLocaleByType unum_getLocaleByType_3_8
+#define unum_getSymbol unum_getSymbol_3_8
+#define unum_getTextAttribute unum_getTextAttribute_3_8
+#define unum_open unum_open_3_8
+#define unum_parse unum_parse_3_8
+#define unum_parseDouble unum_parseDouble_3_8
+#define unum_parseDoubleCurrency unum_parseDoubleCurrency_3_8
+#define unum_parseInt64 unum_parseInt64_3_8
+#define unum_setAttribute unum_setAttribute_3_8
+#define unum_setDoubleAttribute unum_setDoubleAttribute_3_8
+#define unum_setSymbol unum_setSymbol_3_8
+#define unum_setTextAttribute unum_setTextAttribute_3_8
+#define unum_toPattern unum_toPattern_3_8
+#define upname_swap upname_swap_3_8
+#define uprops_getSource uprops_getSource_3_8
+#define upropsvec_addPropertyStarts upropsvec_addPropertyStarts_3_8
+#define uprv_asciiFromEbcdic uprv_asciiFromEbcdic_3_8
+#define uprv_asciitolower uprv_asciitolower_3_8
+#define uprv_ceil uprv_ceil_3_8
+#define uprv_cnttab_addContraction uprv_cnttab_addContraction_3_8
+#define uprv_cnttab_changeContraction uprv_cnttab_changeContraction_3_8
+#define uprv_cnttab_changeLastCE uprv_cnttab_changeLastCE_3_8
+#define uprv_cnttab_clone uprv_cnttab_clone_3_8
+#define uprv_cnttab_close uprv_cnttab_close_3_8
+#define uprv_cnttab_constructTable uprv_cnttab_constructTable_3_8
+#define uprv_cnttab_findCE uprv_cnttab_findCE_3_8
+#define uprv_cnttab_findCP uprv_cnttab_findCP_3_8
+#define uprv_cnttab_getCE uprv_cnttab_getCE_3_8
+#define uprv_cnttab_insertContraction uprv_cnttab_insertContraction_3_8
+#define uprv_cnttab_isTailored uprv_cnttab_isTailored_3_8
+#define uprv_cnttab_open uprv_cnttab_open_3_8
+#define uprv_cnttab_setContraction uprv_cnttab_setContraction_3_8
+#define uprv_compareASCIIPropertyNames uprv_compareASCIIPropertyNames_3_8
+#define uprv_compareEBCDICPropertyNames uprv_compareEBCDICPropertyNames_3_8
+#define uprv_compareInvAscii uprv_compareInvAscii_3_8
+#define uprv_compareInvEbcdic uprv_compareInvEbcdic_3_8
+#define uprv_convertToLCID uprv_convertToLCID_3_8
+#define uprv_convertToPosix uprv_convertToPosix_3_8
+#define uprv_copyAscii uprv_copyAscii_3_8
+#define uprv_copyEbcdic uprv_copyEbcdic_3_8
+#define uprv_ebcdicFromAscii uprv_ebcdicFromAscii_3_8
+#define uprv_ebcdictolower uprv_ebcdictolower_3_8
+#define uprv_fabs uprv_fabs_3_8
+#define uprv_floor uprv_floor_3_8
+#define uprv_fmax uprv_fmax_3_8
+#define uprv_fmin uprv_fmin_3_8
+#define uprv_fmod uprv_fmod_3_8
+#define uprv_free uprv_free_3_8
+#define uprv_getCharNameCharacters uprv_getCharNameCharacters_3_8
+#define uprv_getDefaultCodepage uprv_getDefaultCodepage_3_8
+#define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_3_8
+#define uprv_getInfinity uprv_getInfinity_3_8
+#define uprv_getMaxCharNameLength uprv_getMaxCharNameLength_3_8
+#define uprv_getMaxValues uprv_getMaxValues_3_8
+#define uprv_getNaN uprv_getNaN_3_8
+#define uprv_getStaticCurrencyName uprv_getStaticCurrencyName_3_8
+#define uprv_getUTCtime uprv_getUTCtime_3_8
+#define uprv_haveProperties uprv_haveProperties_3_8
+#define uprv_init_collIterate uprv_init_collIterate_3_8
+#define uprv_int32Comparator uprv_int32Comparator_3_8
+#define uprv_isInfinite uprv_isInfinite_3_8
+#define uprv_isInvariantString uprv_isInvariantString_3_8
+#define uprv_isInvariantUString uprv_isInvariantUString_3_8
+#define uprv_isNaN uprv_isNaN_3_8
+#define uprv_isNegativeInfinity uprv_isNegativeInfinity_3_8
+#define uprv_isPositiveInfinity uprv_isPositiveInfinity_3_8
+#define uprv_isRuleWhiteSpace uprv_isRuleWhiteSpace_3_8
+#define uprv_itou uprv_itou_3_8
+#define uprv_log uprv_log_3_8
+#define uprv_malloc uprv_malloc_3_8
+#define uprv_mapFile uprv_mapFile_3_8
+#define uprv_max uprv_max_3_8
+#define uprv_maximumPtr uprv_maximumPtr_3_8
+#define uprv_maxMantissa uprv_maxMantissa_3_8
+#define uprv_min uprv_min_3_8
+#define uprv_modf uprv_modf_3_8
+#define uprv_openRuleWhiteSpaceSet uprv_openRuleWhiteSpaceSet_3_8
+#define uprv_parseCurrency uprv_parseCurrency_3_8
+#define uprv_pathIsAbsolute uprv_pathIsAbsolute_3_8
+#define uprv_pow uprv_pow_3_8
+#define uprv_pow10 uprv_pow10_3_8
+#define uprv_realloc uprv_realloc_3_8
+#define uprv_round uprv_round_3_8
+#define uprv_sortArray uprv_sortArray_3_8
+#define uprv_strCompare uprv_strCompare_3_8
+#define uprv_strdup uprv_strdup_3_8
+#define uprv_strndup uprv_strndup_3_8
+#define uprv_syntaxError uprv_syntaxError_3_8
+#define uprv_timezone uprv_timezone_3_8
+#define uprv_toupper uprv_toupper_3_8
+#define uprv_trunc uprv_trunc_3_8
+#define uprv_tzname uprv_tzname_3_8
+#define uprv_tzset uprv_tzset_3_8
+#define uprv_uca_addAnElement uprv_uca_addAnElement_3_8
+#define uprv_uca_assembleTable uprv_uca_assembleTable_3_8
+#define uprv_uca_canonicalClosure uprv_uca_canonicalClosure_3_8
+#define uprv_uca_cloneTempTable uprv_uca_cloneTempTable_3_8
+#define uprv_uca_closeTempTable uprv_uca_closeTempTable_3_8
+#define uprv_uca_getCodePointFromRaw uprv_uca_getCodePointFromRaw_3_8
+#define uprv_uca_getImplicitFromRaw uprv_uca_getImplicitFromRaw_3_8
+#define uprv_uca_getRawFromCodePoint uprv_uca_getRawFromCodePoint_3_8
+#define uprv_uca_getRawFromImplicit uprv_uca_getRawFromImplicit_3_8
+#define uprv_uca_initImplicitConstants uprv_uca_initImplicitConstants_3_8
+#define uprv_uca_initTempTable uprv_uca_initTempTable_3_8
+#define uprv_uint16Comparator uprv_uint16Comparator_3_8
+#define uprv_uint32Comparator uprv_uint32Comparator_3_8
+#define uprv_unmapFile uprv_unmapFile_3_8
+#define uregex_appendReplacement uregex_appendReplacement_3_8
+#define uregex_appendTail uregex_appendTail_3_8
+#define uregex_clone uregex_clone_3_8
+#define uregex_close uregex_close_3_8
+#define uregex_end uregex_end_3_8
+#define uregex_find uregex_find_3_8
+#define uregex_findNext uregex_findNext_3_8
+#define uregex_flags uregex_flags_3_8
+#define uregex_getText uregex_getText_3_8
+#define uregex_group uregex_group_3_8
+#define uregex_groupCount uregex_groupCount_3_8
+#define uregex_lookingAt uregex_lookingAt_3_8
+#define uregex_matches uregex_matches_3_8
+#define uregex_open uregex_open_3_8
+#define uregex_openC uregex_openC_3_8
+#define uregex_pattern uregex_pattern_3_8
+#define uregex_replaceAll uregex_replaceAll_3_8
+#define uregex_replaceFirst uregex_replaceFirst_3_8
+#define uregex_reset uregex_reset_3_8
+#define uregex_setText uregex_setText_3_8
+#define uregex_split uregex_split_3_8
+#define uregex_start uregex_start_3_8
+#define ures_clone ures_clone_3_8
+#define ures_close ures_close_3_8
+#define ures_copyResb ures_copyResb_3_8
+#define ures_countArrayItems ures_countArrayItems_3_8
+#define ures_equal ures_equal_3_8
+#define ures_findResource ures_findResource_3_8
+#define ures_findSubResource ures_findSubResource_3_8
+#define ures_getBinary ures_getBinary_3_8
+#define ures_getByIndex ures_getByIndex_3_8
+#define ures_getByKey ures_getByKey_3_8
+#define ures_getByKeyWithFallback ures_getByKeyWithFallback_3_8
+#define ures_getFunctionalEquivalent ures_getFunctionalEquivalent_3_8
+#define ures_getInt ures_getInt_3_8
+#define ures_getIntVector ures_getIntVector_3_8
+#define ures_getKey ures_getKey_3_8
+#define ures_getKeywordValues ures_getKeywordValues_3_8
+#define ures_getLocale ures_getLocale_3_8
+#define ures_getLocaleByType ures_getLocaleByType_3_8
+#define ures_getName ures_getName_3_8
+#define ures_getNextResource ures_getNextResource_3_8
+#define ures_getNextString ures_getNextString_3_8
+#define ures_getParentBundle ures_getParentBundle_3_8
+#define ures_getSize ures_getSize_3_8
+#define ures_getString ures_getString_3_8
+#define ures_getStringByIndex ures_getStringByIndex_3_8
+#define ures_getStringByKey ures_getStringByKey_3_8
+#define ures_getStringByKeyWithFallback ures_getStringByKeyWithFallback_3_8
+#define ures_getType ures_getType_3_8
+#define ures_getUInt ures_getUInt_3_8
+#define ures_getUTF8String ures_getUTF8String_3_8
+#define ures_getUTF8StringByIndex ures_getUTF8StringByIndex_3_8
+#define ures_getUTF8StringByKey ures_getUTF8StringByKey_3_8
+#define ures_getVersion ures_getVersion_3_8
+#define ures_getVersionNumber ures_getVersionNumber_3_8
+#define ures_hasNext ures_hasNext_3_8
+#define ures_initStackObject ures_initStackObject_3_8
+#define ures_open ures_open_3_8
+#define ures_openAvailableLocales ures_openAvailableLocales_3_8
+#define ures_openDirect ures_openDirect_3_8
+#define ures_openFillIn ures_openFillIn_3_8
+#define ures_openU ures_openU_3_8
+#define ures_resetIterator ures_resetIterator_3_8
+#define ures_swap ures_swap_3_8
+#define uscript_closeRun uscript_closeRun_3_8
+#define uscript_getCode uscript_getCode_3_8
+#define uscript_getName uscript_getName_3_8
+#define uscript_getScript uscript_getScript_3_8
+#define uscript_getShortName uscript_getShortName_3_8
+#define uscript_nextRun uscript_nextRun_3_8
+#define uscript_openRun uscript_openRun_3_8
+#define uscript_resetRun uscript_resetRun_3_8
+#define uscript_setRunText uscript_setRunText_3_8
+#define usearch_close usearch_close_3_8
+#define usearch_first usearch_first_3_8
+#define usearch_following usearch_following_3_8
+#define usearch_getAttribute usearch_getAttribute_3_8
+#define usearch_getBreakIterator usearch_getBreakIterator_3_8
+#define usearch_getCollator usearch_getCollator_3_8
+#define usearch_getMatchedLength usearch_getMatchedLength_3_8
+#define usearch_getMatchedStart usearch_getMatchedStart_3_8
+#define usearch_getMatchedText usearch_getMatchedText_3_8
+#define usearch_getOffset usearch_getOffset_3_8
+#define usearch_getPattern usearch_getPattern_3_8
+#define usearch_getText usearch_getText_3_8
+#define usearch_handleNextCanonical usearch_handleNextCanonical_3_8
+#define usearch_handleNextExact usearch_handleNextExact_3_8
+#define usearch_handlePreviousCanonical usearch_handlePreviousCanonical_3_8
+#define usearch_handlePreviousExact usearch_handlePreviousExact_3_8
+#define usearch_last usearch_last_3_8
+#define usearch_next usearch_next_3_8
+#define usearch_open usearch_open_3_8
+#define usearch_openFromCollator usearch_openFromCollator_3_8
+#define usearch_preceding usearch_preceding_3_8
+#define usearch_previous usearch_previous_3_8
+#define usearch_reset usearch_reset_3_8
+#define usearch_setAttribute usearch_setAttribute_3_8
+#define usearch_setBreakIterator usearch_setBreakIterator_3_8
+#define usearch_setCollator usearch_setCollator_3_8
+#define usearch_setOffset usearch_setOffset_3_8
+#define usearch_setPattern usearch_setPattern_3_8
+#define usearch_setText usearch_setText_3_8
+#define uset_add uset_add_3_8
+#define uset_addAll uset_addAll_3_8
+#define uset_addAllCodePoints uset_addAllCodePoints_3_8
+#define uset_addRange uset_addRange_3_8
+#define uset_addString uset_addString_3_8
+#define uset_applyIntPropertyValue uset_applyIntPropertyValue_3_8
+#define uset_applyPattern uset_applyPattern_3_8
+#define uset_applyPropertyAlias uset_applyPropertyAlias_3_8
+#define uset_charAt uset_charAt_3_8
+#define uset_clear uset_clear_3_8
+#define uset_clone uset_clone_3_8
+#define uset_cloneAsThawed uset_cloneAsThawed_3_8
+#define uset_close uset_close_3_8
+#define uset_compact uset_compact_3_8
+#define uset_complement uset_complement_3_8
+#define uset_complementAll uset_complementAll_3_8
+#define uset_contains uset_contains_3_8
+#define uset_containsAll uset_containsAll_3_8
+#define uset_containsAllCodePoints uset_containsAllCodePoints_3_8
+#define uset_containsNone uset_containsNone_3_8
+#define uset_containsRange uset_containsRange_3_8
+#define uset_containsSome uset_containsSome_3_8
+#define uset_containsString uset_containsString_3_8
+#define uset_equals uset_equals_3_8
+#define uset_freeze uset_freeze_3_8
+#define uset_getItem uset_getItem_3_8
+#define uset_getItemCount uset_getItemCount_3_8
+#define uset_getSerializedRange uset_getSerializedRange_3_8
+#define uset_getSerializedRangeCount uset_getSerializedRangeCount_3_8
+#define uset_getSerializedSet uset_getSerializedSet_3_8
+#define uset_indexOf uset_indexOf_3_8
+#define uset_isEmpty uset_isEmpty_3_8
+#define uset_isFrozen uset_isFrozen_3_8
+#define uset_open uset_open_3_8
+#define uset_openPattern uset_openPattern_3_8
+#define uset_openPatternOptions uset_openPatternOptions_3_8
+#define uset_remove uset_remove_3_8
+#define uset_removeAll uset_removeAll_3_8
+#define uset_removeRange uset_removeRange_3_8
+#define uset_removeString uset_removeString_3_8
+#define uset_resemblesPattern uset_resemblesPattern_3_8
+#define uset_retain uset_retain_3_8
+#define uset_retainAll uset_retainAll_3_8
+#define uset_serialize uset_serialize_3_8
+#define uset_serializedContains uset_serializedContains_3_8
+#define uset_set uset_set_3_8
+#define uset_setSerializedToOne uset_setSerializedToOne_3_8
+#define uset_size uset_size_3_8
+#define uset_span uset_span_3_8
+#define uset_spanBack uset_spanBack_3_8
+#define uset_spanBackUTF8 uset_spanBackUTF8_3_8
+#define uset_spanUTF8 uset_spanUTF8_3_8
+#define uset_toPattern uset_toPattern_3_8
+#define usprep_close usprep_close_3_8
+#define usprep_open usprep_open_3_8
+#define usprep_prepare usprep_prepare_3_8
+#define usprep_swap usprep_swap_3_8
+#define ustr_foldCase ustr_foldCase_3_8
+#define ustr_toLower ustr_toLower_3_8
+#define ustr_toTitle ustr_toTitle_3_8
+#define ustr_toUpper ustr_toUpper_3_8
+#define utext_char32At utext_char32At_3_8
+#define utext_clone utext_clone_3_8
+#define utext_close utext_close_3_8
+#define utext_copy utext_copy_3_8
+#define utext_current32 utext_current32_3_8
+#define utext_equals utext_equals_3_8
+#define utext_extract utext_extract_3_8
+#define utext_freeze utext_freeze_3_8
+#define utext_getNativeIndex utext_getNativeIndex_3_8
+#define utext_getPreviousNativeIndex utext_getPreviousNativeIndex_3_8
+#define utext_hasMetaData utext_hasMetaData_3_8
+#define utext_isLengthExpensive utext_isLengthExpensive_3_8
+#define utext_isWritable utext_isWritable_3_8
+#define utext_moveIndex32 utext_moveIndex32_3_8
+#define utext_nativeLength utext_nativeLength_3_8
+#define utext_next32 utext_next32_3_8
+#define utext_next32From utext_next32From_3_8
+#define utext_openCharacterIterator utext_openCharacterIterator_3_8
+#define utext_openConstUnicodeString utext_openConstUnicodeString_3_8
+#define utext_openReplaceable utext_openReplaceable_3_8
+#define utext_openUChars utext_openUChars_3_8
+#define utext_openUTF8 utext_openUTF8_3_8
+#define utext_openUnicodeString utext_openUnicodeString_3_8
+#define utext_previous32 utext_previous32_3_8
+#define utext_previous32From utext_previous32From_3_8
+#define utext_replace utext_replace_3_8
+#define utext_setNativeIndex utext_setNativeIndex_3_8
+#define utext_setup utext_setup_3_8
+#define utf8_appendCharSafeBody utf8_appendCharSafeBody_3_8
+#define utf8_back1SafeBody utf8_back1SafeBody_3_8
+#define utf8_countTrailBytes utf8_countTrailBytes_3_8
+#define utf8_nextCharSafeBody utf8_nextCharSafeBody_3_8
+#define utf8_prevCharSafeBody utf8_prevCharSafeBody_3_8
+#define utmscale_fromInt64 utmscale_fromInt64_3_8
+#define utmscale_getTimeScaleValue utmscale_getTimeScaleValue_3_8
+#define utmscale_toInt64 utmscale_toInt64_3_8
+#define utrace_cleanup utrace_cleanup_3_8
+#define utrace_data utrace_data_3_8
+#define utrace_entry utrace_entry_3_8
+#define utrace_exit utrace_exit_3_8
+#define utrace_format utrace_format_3_8
+#define utrace_functionName utrace_functionName_3_8
+#define utrace_getFunctions utrace_getFunctions_3_8
+#define utrace_getLevel utrace_getLevel_3_8
+#define utrace_level utrace_level_3_8
+#define utrace_setFunctions utrace_setFunctions_3_8
+#define utrace_setLevel utrace_setLevel_3_8
+#define utrace_vformat utrace_vformat_3_8
+#define utrans_clone utrans_clone_3_8
+#define utrans_close utrans_close_3_8
+#define utrans_countAvailableIDs utrans_countAvailableIDs_3_8
+#define utrans_getAvailableID utrans_getAvailableID_3_8
+#define utrans_getID utrans_getID_3_8
+#define utrans_getUnicodeID utrans_getUnicodeID_3_8
+#define utrans_open utrans_open_3_8
+#define utrans_openIDs utrans_openIDs_3_8
+#define utrans_openInverse utrans_openInverse_3_8
+#define utrans_openU utrans_openU_3_8
+#define utrans_register utrans_register_3_8
+#define utrans_rep_caseContextIterator utrans_rep_caseContextIterator_3_8
+#define utrans_setFilter utrans_setFilter_3_8
+#define utrans_stripRules utrans_stripRules_3_8
+#define utrans_trans utrans_trans_3_8
+#define utrans_transIncremental utrans_transIncremental_3_8
+#define utrans_transIncrementalUChars utrans_transIncrementalUChars_3_8
+#define utrans_transUChars utrans_transUChars_3_8
+#define utrans_unregister utrans_unregister_3_8
+#define utrans_unregisterID utrans_unregisterID_3_8
+#define utrie_clone utrie_clone_3_8
+#define utrie_close utrie_close_3_8
+#define utrie_defaultGetFoldingOffset utrie_defaultGetFoldingOffset_3_8
+#define utrie_enum utrie_enum_3_8
+#define utrie_get32 utrie_get32_3_8
+#define utrie_getData utrie_getData_3_8
+#define utrie_open utrie_open_3_8
+#define utrie_serialize utrie_serialize_3_8
+#define utrie_set32 utrie_set32_3_8
+#define utrie_setRange32 utrie_setRange32_3_8
+#define utrie_swap utrie_swap_3_8
+#define utrie_unserialize utrie_unserialize_3_8
+#define utrie_unserializeDummy utrie_unserializeDummy_3_8
+/* C++ class names renaming defines */
+
+#ifdef XP_CPLUSPLUS
+#if !U_HAVE_NAMESPACE
+
+#define AbsoluteValueSubstitution AbsoluteValueSubstitution_3_8
+#define AlternateSubstitutionSubtable AlternateSubstitutionSubtable_3_8
+#define AnchorTable AnchorTable_3_8
+#define AnnualTimeZoneRule AnnualTimeZoneRule_3_8
+#define AnyTransliterator AnyTransliterator_3_8
+#define ArabicOpenTypeLayoutEngine ArabicOpenTypeLayoutEngine_3_8
+#define ArabicShaping ArabicShaping_3_8
+#define BMPSet BMPSet_3_8
+#define BasicCalendarFactory BasicCalendarFactory_3_8
+#define BasicTimeZone BasicTimeZone_3_8
+#define BinarySearchLookupTable BinarySearchLookupTable_3_8
+#define BreakIterator BreakIterator_3_8
+#define BuddhistCalendar BuddhistCalendar_3_8
+#define BuildCompactTrieHorizontalNode BuildCompactTrieHorizontalNode_3_8
+#define BuildCompactTrieNode BuildCompactTrieNode_3_8
+#define BuildCompactTrieVerticalNode BuildCompactTrieVerticalNode_3_8
+#define CFactory CFactory_3_8
+#define Calendar Calendar_3_8
+#define CalendarAstronomer CalendarAstronomer_3_8
+#define CalendarCache CalendarCache_3_8
+#define CalendarData CalendarData_3_8
+#define CalendarService CalendarService_3_8
+#define CanonMarkFilter CanonMarkFilter_3_8
+#define CanonShaping CanonShaping_3_8
+#define CanonicalIterator CanonicalIterator_3_8
+#define CaseMapTransliterator CaseMapTransliterator_3_8
+#define ChainingContextualSubstitutionFormat1Subtable ChainingContextualSubstitutionFormat1Subtable_3_8
+#define ChainingContextualSubstitutionFormat2Subtable ChainingContextualSubstitutionFormat2Subtable_3_8
+#define ChainingContextualSubstitutionFormat3Subtable ChainingContextualSubstitutionFormat3Subtable_3_8
+#define ChainingContextualSubstitutionSubtable ChainingContextualSubstitutionSubtable_3_8
+#define CharSubstitutionFilter CharSubstitutionFilter_3_8
+#define CharacterIterator CharacterIterator_3_8
+#define CharsetDetector CharsetDetector_3_8
+#define CharsetMatch CharsetMatch_3_8
+#define CharsetRecog_2022 CharsetRecog_2022_3_8
+#define CharsetRecog_2022CN CharsetRecog_2022CN_3_8
+#define CharsetRecog_2022JP CharsetRecog_2022JP_3_8
+#define CharsetRecog_2022KR CharsetRecog_2022KR_3_8
+#define CharsetRecog_8859_1 CharsetRecog_8859_1_3_8
+#define CharsetRecog_8859_1_da CharsetRecog_8859_1_da_3_8
+#define CharsetRecog_8859_1_de CharsetRecog_8859_1_de_3_8
+#define CharsetRecog_8859_1_en CharsetRecog_8859_1_en_3_8
+#define CharsetRecog_8859_1_es CharsetRecog_8859_1_es_3_8
+#define CharsetRecog_8859_1_fr CharsetRecog_8859_1_fr_3_8
+#define CharsetRecog_8859_1_it CharsetRecog_8859_1_it_3_8
+#define CharsetRecog_8859_1_nl CharsetRecog_8859_1_nl_3_8
+#define CharsetRecog_8859_1_no CharsetRecog_8859_1_no_3_8
+#define CharsetRecog_8859_1_pt CharsetRecog_8859_1_pt_3_8
+#define CharsetRecog_8859_1_sv CharsetRecog_8859_1_sv_3_8
+#define CharsetRecog_8859_2 CharsetRecog_8859_2_3_8
+#define CharsetRecog_8859_2_cs CharsetRecog_8859_2_cs_3_8
+#define CharsetRecog_8859_2_hu CharsetRecog_8859_2_hu_3_8
+#define CharsetRecog_8859_2_pl CharsetRecog_8859_2_pl_3_8
+#define CharsetRecog_8859_2_ro CharsetRecog_8859_2_ro_3_8
+#define CharsetRecog_8859_5 CharsetRecog_8859_5_3_8
+#define CharsetRecog_8859_5_ru CharsetRecog_8859_5_ru_3_8
+#define CharsetRecog_8859_6 CharsetRecog_8859_6_3_8
+#define CharsetRecog_8859_6_ar CharsetRecog_8859_6_ar_3_8
+#define CharsetRecog_8859_7 CharsetRecog_8859_7_3_8
+#define CharsetRecog_8859_7_el CharsetRecog_8859_7_el_3_8
+#define CharsetRecog_8859_8 CharsetRecog_8859_8_3_8
+#define CharsetRecog_8859_8_I_he CharsetRecog_8859_8_I_he_3_8
+#define CharsetRecog_8859_8_he CharsetRecog_8859_8_he_3_8
+#define CharsetRecog_8859_9 CharsetRecog_8859_9_3_8
+#define CharsetRecog_8859_9_tr CharsetRecog_8859_9_tr_3_8
+#define CharsetRecog_KOI8_R CharsetRecog_KOI8_R_3_8
+#define CharsetRecog_UTF8 CharsetRecog_UTF8_3_8
+#define CharsetRecog_UTF_16_BE CharsetRecog_UTF_16_BE_3_8
+#define CharsetRecog_UTF_16_LE CharsetRecog_UTF_16_LE_3_8
+#define CharsetRecog_UTF_32 CharsetRecog_UTF_32_3_8
+#define CharsetRecog_UTF_32_BE CharsetRecog_UTF_32_BE_3_8
+#define CharsetRecog_UTF_32_LE CharsetRecog_UTF_32_LE_3_8
+#define CharsetRecog_Unicode CharsetRecog_Unicode_3_8
+#define CharsetRecog_big5 CharsetRecog_big5_3_8
+#define CharsetRecog_euc CharsetRecog_euc_3_8
+#define CharsetRecog_euc_jp CharsetRecog_euc_jp_3_8
+#define CharsetRecog_euc_kr CharsetRecog_euc_kr_3_8
+#define CharsetRecog_gb_18030 CharsetRecog_gb_18030_3_8
+#define CharsetRecog_mbcs CharsetRecog_mbcs_3_8
+#define CharsetRecog_sbcs CharsetRecog_sbcs_3_8
+#define CharsetRecog_sjis CharsetRecog_sjis_3_8
+#define CharsetRecog_windows_1251 CharsetRecog_windows_1251_3_8
+#define CharsetRecog_windows_1256 CharsetRecog_windows_1256_3_8
+#define CharsetRecognizer CharsetRecognizer_3_8
+#define ChoiceFormat ChoiceFormat_3_8
+#define ClassDefFormat1Table ClassDefFormat1Table_3_8
+#define ClassDefFormat2Table ClassDefFormat2Table_3_8
+#define ClassDefinitionTable ClassDefinitionTable_3_8
+#define CollationElementIterator CollationElementIterator_3_8
+#define CollationKey CollationKey_3_8
+#define CollationLocaleListEnumeration CollationLocaleListEnumeration_3_8
+#define Collator Collator_3_8
+#define CollatorFactory CollatorFactory_3_8
+#define CompactTrieDictionary CompactTrieDictionary_3_8
+#define CompactTrieEnumeration CompactTrieEnumeration_3_8
+#define CompoundTransliterator CompoundTransliterator_3_8
+#define ContextualGlyphSubstitutionProcessor ContextualGlyphSubstitutionProcessor_3_8
+#define ContextualSubstitutionBase ContextualSubstitutionBase_3_8
+#define ContextualSubstitutionFormat1Subtable ContextualSubstitutionFormat1Subtable_3_8
+#define ContextualSubstitutionFormat2Subtable ContextualSubstitutionFormat2Subtable_3_8
+#define ContextualSubstitutionFormat3Subtable ContextualSubstitutionFormat3Subtable_3_8
+#define ContextualSubstitutionSubtable ContextualSubstitutionSubtable_3_8
+#define CoverageFormat1Table CoverageFormat1Table_3_8
+#define CoverageFormat2Table CoverageFormat2Table_3_8
+#define CoverageTable CoverageTable_3_8
+#define CurrencyAmount CurrencyAmount_3_8
+#define CurrencyFormat CurrencyFormat_3_8
+#define CurrencyUnit CurrencyUnit_3_8
+#define CursiveAttachmentSubtable CursiveAttachmentSubtable_3_8
+#define DTRedundantEnumeration DTRedundantEnumeration_3_8
+#define DTSkeletonEnumeration DTSkeletonEnumeration_3_8
+#define DateFormat DateFormat_3_8
+#define DateFormatSymbols DateFormatSymbols_3_8
+#define DateTimeMatcher DateTimeMatcher_3_8
+#define DateTimePatternGenerator DateTimePatternGenerator_3_8
+#define DateTimeRule DateTimeRule_3_8
+#define DecimalFormat DecimalFormat_3_8
+#define DecimalFormatSymbols DecimalFormatSymbols_3_8
+#define DefaultCalendarFactory DefaultCalendarFactory_3_8
+#define DefaultCharMapper DefaultCharMapper_3_8
+#define DeviceTable DeviceTable_3_8
+#define DictionaryBreakEngine DictionaryBreakEngine_3_8
+#define DigitList DigitList_3_8
+#define DistanceInfo DistanceInfo_3_8
+#define Entry Entry_3_8
+#define EnumToOffset EnumToOffset_3_8
+#define EscapeTransliterator EscapeTransliterator_3_8
+#define EventListener EventListener_3_8
+#define ExtensionSubtable ExtensionSubtable_3_8
+#define FeatureListTable FeatureListTable_3_8
+#define FieldPosition FieldPosition_3_8
+#define FontRuns FontRuns_3_8
+#define Format Format_3_8
+#define Format1AnchorTable Format1AnchorTable_3_8
+#define Format2AnchorTable Format2AnchorTable_3_8
+#define Format3AnchorTable Format3AnchorTable_3_8
+#define FormatParser FormatParser_3_8
+#define Formattable Formattable_3_8
+#define ForwardCharacterIterator ForwardCharacterIterator_3_8
+#define FractionalPartSubstitution FractionalPartSubstitution_3_8
+#define FunctionReplacer FunctionReplacer_3_8
+#define GDEFMarkFilter GDEFMarkFilter_3_8
+#define GXLayoutEngine GXLayoutEngine_3_8
+#define GlyphDefinitionTableHeader GlyphDefinitionTableHeader_3_8
+#define GlyphIterator GlyphIterator_3_8
+#define GlyphLookupTableHeader GlyphLookupTableHeader_3_8
+#define GlyphPositionAdjustments GlyphPositionAdjustments_3_8
+#define GlyphPositioningLookupProcessor GlyphPositioningLookupProcessor_3_8
+#define GlyphPositioningTableHeader GlyphPositioningTableHeader_3_8
+#define GlyphSubstitutionLookupProcessor GlyphSubstitutionLookupProcessor_3_8
+#define GlyphSubstitutionTableHeader GlyphSubstitutionTableHeader_3_8
+#define Grego Grego_3_8
+#define GregorianCalendar GregorianCalendar_3_8
+#define HanOpenTypeLayoutEngine HanOpenTypeLayoutEngine_3_8
+#define HangulOpenTypeLayoutEngine HangulOpenTypeLayoutEngine_3_8
+#define HebrewCalendar HebrewCalendar_3_8
+#define ICUBreakIteratorFactory ICUBreakIteratorFactory_3_8
+#define ICUBreakIteratorService ICUBreakIteratorService_3_8
+#define ICUCollatorFactory ICUCollatorFactory_3_8
+#define ICUCollatorService ICUCollatorService_3_8
+#define ICULanguageBreakFactory ICULanguageBreakFactory_3_8
+#define ICULocaleService ICULocaleService_3_8
+#define ICUNotifier ICUNotifier_3_8
+#define ICUNumberFormatFactory ICUNumberFormatFactory_3_8
+#define ICUNumberFormatService ICUNumberFormatService_3_8
+#define ICUResourceBundleFactory ICUResourceBundleFactory_3_8
+#define ICUService ICUService_3_8
+#define ICUServiceFactory ICUServiceFactory_3_8
+#define ICUServiceKey ICUServiceKey_3_8
+#define ICU_Utility ICU_Utility_3_8
+#define IndianCalendar IndianCalendar_3_8
+#define IndicClassTable IndicClassTable_3_8
+#define IndicOpenTypeLayoutEngine IndicOpenTypeLayoutEngine_3_8
+#define IndicRearrangementProcessor IndicRearrangementProcessor_3_8
+#define IndicReordering IndicReordering_3_8
+#define InitialTimeZoneRule InitialTimeZoneRule_3_8
+#define InputText InputText_3_8
+#define IntegralPartSubstitution IntegralPartSubstitution_3_8
+#define IslamicCalendar IslamicCalendar_3_8
+#define IteratedChar IteratedChar_3_8
+#define JapaneseCalendar JapaneseCalendar_3_8
+#define KernTable KernTable_3_8
+#define KeywordEnumeration KeywordEnumeration_3_8
+#define KhmerClassTable KhmerClassTable_3_8
+#define KhmerOpenTypeLayoutEngine KhmerOpenTypeLayoutEngine_3_8
+#define KhmerReordering KhmerReordering_3_8
+#define LECharMapper LECharMapper_3_8
+#define LEFontInstance LEFontInstance_3_8
+#define LEGlyphFilter LEGlyphFilter_3_8
+#define LEGlyphStorage LEGlyphStorage_3_8
+#define LEInsertionCallback LEInsertionCallback_3_8
+#define LEInsertionList LEInsertionList_3_8
+#define LXUtilities LXUtilities_3_8
+#define LanguageBreakEngine LanguageBreakEngine_3_8
+#define LanguageBreakFactory LanguageBreakFactory_3_8
+#define LayoutEngine LayoutEngine_3_8
+#define LigatureSubstitutionProcessor LigatureSubstitutionProcessor_3_8
+#define LigatureSubstitutionSubtable LigatureSubstitutionSubtable_3_8
+#define LocDataParser LocDataParser_3_8
+#define Locale Locale_3_8
+#define LocaleBased LocaleBased_3_8
+#define LocaleKey LocaleKey_3_8
+#define LocaleKeyFactory LocaleKeyFactory_3_8
+#define LocaleRuns LocaleRuns_3_8
+#define LocaleUtility LocaleUtility_3_8
+#define LocalizationInfo LocalizationInfo_3_8
+#define LookupListTable LookupListTable_3_8
+#define LookupProcessor LookupProcessor_3_8
+#define LookupSubtable LookupSubtable_3_8
+#define LookupTable LookupTable_3_8
+#define LowercaseTransliterator LowercaseTransliterator_3_8
+#define MPreFixups MPreFixups_3_8
+#define MarkArray MarkArray_3_8
+#define MarkToBasePositioningSubtable MarkToBasePositioningSubtable_3_8
+#define MarkToLigaturePositioningSubtable MarkToLigaturePositioningSubtable_3_8
+#define MarkToMarkPositioningSubtable MarkToMarkPositioningSubtable_3_8
+#define Math Math_3_8
+#define Measure Measure_3_8
+#define MeasureFormat MeasureFormat_3_8
+#define MeasureUnit MeasureUnit_3_8
+#define MessageFormat MessageFormat_3_8
+#define MessageFormatAdapter MessageFormatAdapter_3_8
+#define ModulusSubstitution ModulusSubstitution_3_8
+#define MoonRiseSetCoordFunc MoonRiseSetCoordFunc_3_8
+#define MoonTimeAngleFunc MoonTimeAngleFunc_3_8
+#define MorphSubtableHeader MorphSubtableHeader_3_8
+#define MorphTableHeader MorphTableHeader_3_8
+#define MultipleSubstitutionSubtable MultipleSubstitutionSubtable_3_8
+#define MultiplierSubstitution MultiplierSubstitution_3_8
+#define MutableTrieDictionary MutableTrieDictionary_3_8
+#define MutableTrieEnumeration MutableTrieEnumeration_3_8
+#define NFFactory NFFactory_3_8
+#define NFRule NFRule_3_8
+#define NFRuleSet NFRuleSet_3_8
+#define NFSubstitution NFSubstitution_3_8
+#define NGramParser NGramParser_3_8
+#define NameToEnum NameToEnum_3_8
+#define NameUnicodeTransliterator NameUnicodeTransliterator_3_8
+#define NonContextualGlyphSubstitutionProcessor NonContextualGlyphSubstitutionProcessor_3_8
+#define NonContiguousEnumToOffset NonContiguousEnumToOffset_3_8
+#define NormalizationTransliterator NormalizationTransliterator_3_8
+#define Normalizer Normalizer_3_8
+#define NullSubstitution NullSubstitution_3_8
+#define NullTransliterator NullTransliterator_3_8
+#define NumberFormat NumberFormat_3_8
+#define NumberFormatFactory NumberFormatFactory_3_8
+#define NumeratorSubstitution NumeratorSubstitution_3_8
+#define OlsonTimeZone OlsonTimeZone_3_8
+#define OpenTypeLayoutEngine OpenTypeLayoutEngine_3_8
+#define OpenTypeUtilities OpenTypeUtilities_3_8
+#define PairPositioningFormat1Subtable PairPositioningFormat1Subtable_3_8
+#define PairPositioningFormat2Subtable PairPositioningFormat2Subtable_3_8
+#define PairPositioningSubtable PairPositioningSubtable_3_8
+#define ParagraphLayout ParagraphLayout_3_8
+#define ParseData ParseData_3_8
+#define ParsePosition ParsePosition_3_8
+#define PatternMap PatternMap_3_8
+#define PatternMapIterator PatternMapIterator_3_8
+#define PersianCalendar PersianCalendar_3_8
+#define PropertyAliases PropertyAliases_3_8
+#define PtnElem PtnElem_3_8
+#define PtnSkeleton PtnSkeleton_3_8
+#define Quantifier Quantifier_3_8
+#define RBBIDataWrapper RBBIDataWrapper_3_8
+#define RBBINode RBBINode_3_8
+#define RBBIRuleBuilder RBBIRuleBuilder_3_8
+#define RBBIRuleScanner RBBIRuleScanner_3_8
+#define RBBISetBuilder RBBISetBuilder_3_8
+#define RBBIStateDescriptor RBBIStateDescriptor_3_8
+#define RBBISymbolTable RBBISymbolTable_3_8
+#define RBBISymbolTableEntry RBBISymbolTableEntry_3_8
+#define RBBITableBuilder RBBITableBuilder_3_8
+#define RangeDescriptor RangeDescriptor_3_8
+#define RegexCompile RegexCompile_3_8
+#define RegexMatcher RegexMatcher_3_8
+#define RegexPattern RegexPattern_3_8
+#define RegexStaticSets RegexStaticSets_3_8
+#define RelativeDateFormat RelativeDateFormat_3_8
+#define RemoveTransliterator RemoveTransliterator_3_8
+#define Replaceable Replaceable_3_8
+#define ReplaceableGlue ReplaceableGlue_3_8
+#define ResourceBundle ResourceBundle_3_8
+#define RiseSetCoordFunc RiseSetCoordFunc_3_8
+#define RuleBasedBreakIterator RuleBasedBreakIterator_3_8
+#define RuleBasedCollator RuleBasedCollator_3_8
+#define RuleBasedNumberFormat RuleBasedNumberFormat_3_8
+#define RuleBasedTimeZone RuleBasedTimeZone_3_8
+#define RuleBasedTransliterator RuleBasedTransliterator_3_8
+#define RuleCharacterIterator RuleCharacterIterator_3_8
+#define RuleHalf RuleHalf_3_8
+#define RunArray RunArray_3_8
+#define SameValueSubstitution SameValueSubstitution_3_8
+#define ScriptListTable ScriptListTable_3_8
+#define ScriptRunIterator ScriptRunIterator_3_8
+#define ScriptTable ScriptTable_3_8
+#define SearchIterator SearchIterator_3_8
+#define SegmentArrayProcessor SegmentArrayProcessor_3_8
+#define SegmentSingleProcessor SegmentSingleProcessor_3_8
+#define ServiceEnumeration ServiceEnumeration_3_8
+#define ServiceListener ServiceListener_3_8
+#define SimpleArrayProcessor SimpleArrayProcessor_3_8
+#define SimpleDateFormat SimpleDateFormat_3_8
+#define SimpleFactory SimpleFactory_3_8
+#define SimpleLocaleKeyFactory SimpleLocaleKeyFactory_3_8
+#define SimpleNumberFormatFactory SimpleNumberFormatFactory_3_8
+#define SimpleTimeZone SimpleTimeZone_3_8
+#define SinglePositioningFormat1Subtable SinglePositioningFormat1Subtable_3_8
+#define SinglePositioningFormat2Subtable SinglePositioningFormat2Subtable_3_8
+#define SinglePositioningSubtable SinglePositioningSubtable_3_8
+#define SingleSubstitutionFormat1Subtable SingleSubstitutionFormat1Subtable_3_8
+#define SingleSubstitutionFormat2Subtable SingleSubstitutionFormat2Subtable_3_8
+#define SingleSubstitutionSubtable SingleSubstitutionSubtable_3_8
+#define SingleTableProcessor SingleTableProcessor_3_8
+#define Spec Spec_3_8
+#define StateTableProcessor StateTableProcessor_3_8
+#define StringCharacterIterator StringCharacterIterator_3_8
+#define StringEnumeration StringEnumeration_3_8
+#define StringLocalizationInfo StringLocalizationInfo_3_8
+#define StringMatcher StringMatcher_3_8
+#define StringPair StringPair_3_8
+#define StringReplacer StringReplacer_3_8
+#define StringSearch StringSearch_3_8
+#define StyleRuns StyleRuns_3_8
+#define SubstitutionLookup SubstitutionLookup_3_8
+#define SubtableProcessor SubtableProcessor_3_8
+#define SunTimeAngleFunc SunTimeAngleFunc_3_8
+#define SymbolTable SymbolTable_3_8
+#define TZEnumeration TZEnumeration_3_8
+#define TaiwanCalendar TaiwanCalendar_3_8
+#define TernaryNode TernaryNode_3_8
+#define ThaiBreakEngine ThaiBreakEngine_3_8
+#define ThaiLayoutEngine ThaiLayoutEngine_3_8
+#define ThaiShaping ThaiShaping_3_8
+#define TibetanClassTable TibetanClassTable_3_8
+#define TibetanOpenTypeLayoutEngine TibetanOpenTypeLayoutEngine_3_8
+#define TibetanReordering TibetanReordering_3_8
+#define TimeArrayTimeZoneRule TimeArrayTimeZoneRule_3_8
+#define TimeZone TimeZone_3_8
+#define TimeZoneKeysEnumeration TimeZoneKeysEnumeration_3_8
+#define TimeZoneRule TimeZoneRule_3_8
+#define TimeZoneTransition TimeZoneTransition_3_8
+#define TitlecaseTransliterator TitlecaseTransliterator_3_8
+#define TransliterationRule TransliterationRule_3_8
+#define TransliterationRuleData TransliterationRuleData_3_8
+#define TransliterationRuleSet TransliterationRuleSet_3_8
+#define Transliterator Transliterator_3_8
+#define TransliteratorAlias TransliteratorAlias_3_8
+#define TransliteratorIDParser TransliteratorIDParser_3_8
+#define TransliteratorParser TransliteratorParser_3_8
+#define TransliteratorRegistry TransliteratorRegistry_3_8
+#define TrieWordDictionary TrieWordDictionary_3_8
+#define TrimmedArrayProcessor TrimmedArrayProcessor_3_8
+#define UCharCharacterIterator UCharCharacterIterator_3_8
+#define ULocRuns ULocRuns_3_8
+#define UMemory UMemory_3_8
+#define UObject UObject_3_8
+#define URegularExpression URegularExpression_3_8
+#define UStack UStack_3_8
+#define UStringEnumeration UStringEnumeration_3_8
+#define UVector UVector_3_8
+#define UVector32 UVector32_3_8
+#define UnescapeTransliterator UnescapeTransliterator_3_8
+#define UnhandledEngine UnhandledEngine_3_8
+#define UnicodeArabicOpenTypeLayoutEngine UnicodeArabicOpenTypeLayoutEngine_3_8
+#define UnicodeFilter UnicodeFilter_3_8
+#define UnicodeFunctor UnicodeFunctor_3_8
+#define UnicodeMatcher UnicodeMatcher_3_8
+#define UnicodeNameTransliterator UnicodeNameTransliterator_3_8
+#define UnicodeReplacer UnicodeReplacer_3_8
+#define UnicodeSet UnicodeSet_3_8
+#define UnicodeSetIterator UnicodeSetIterator_3_8
+#define UnicodeSetStringSpan UnicodeSetStringSpan_3_8
+#define UnicodeString UnicodeString_3_8
+#define UppercaseTransliterator UppercaseTransliterator_3_8
+#define VTZReader VTZReader_3_8
+#define VTZWriter VTZWriter_3_8
+#define VTimeZone VTimeZone_3_8
+#define ValueRecord ValueRecord_3_8
+#define ValueRuns ValueRuns_3_8
+#define locale_set_default_internal locale_set_default_internal_3_8
+#define util64_fromDouble util64_fromDouble_3_8
+#define util64_pow util64_pow_3_8
+#define util64_tou util64_tou_3_8
+
+#endif
+#endif
+
+#endif
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/urep.h b/jni/EastAsianWidth/unicode/urep.h
new file mode 100644
index 0000000..57b547c
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/urep.h
@@ -0,0 +1,155 @@
+/*
+******************************************************************************
+* Copyright (C) 1997-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 06/23/00 aliu Creation.
+******************************************************************************
+*/
+
+#ifndef __UREP_H
+#define __UREP_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/********************************************************************
+ * General Notes
+ ********************************************************************
+ * TODO
+ * Add usage scenario
+ * Add test code
+ * Talk about pinning
+ * Talk about "can truncate result if out of memory"
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+/**
+ * \file
+ * \brief C API: Callbacks for UReplacebale
+ */
+/**
+ * An opaque replaceable text object. This will be manipulated only
+ * through the caller-supplied UReplaceableFunctor struct. Related
+ * to the C++ class Replaceable.
+ * This is currently only used in the Transliterator C API, see utrans.h .
+ * @stable ICU 2.0
+ */
+typedef void* UReplaceable;
+
+/**
+ * A set of function pointers that transliterators use to manipulate a
+ * UReplaceable. The caller should supply the required functions to
+ * manipulate their text appropriately. Related to the C++ class
+ * Replaceable.
+ * @stable ICU 2.0
+ */
+typedef struct UReplaceableCallbacks {
+
+ /**
+ * Function pointer that returns the number of UChar code units in
+ * this text.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @return The length of the text.
+ * @stable ICU 2.0
+ */
+ int32_t (*length)(const UReplaceable* rep);
+
+ /**
+ * Function pointer that returns a UChar code units at the given
+ * offset into this text; 0 <= offset < n, where n is the value
+ * returned by (*length)(rep). See unistr.h for a description of
+ * charAt() vs. char32At().
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param offset The index at which to fetch the UChar (code unit).
+ * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
+ * @stable ICU 2.0
+ */
+ UChar (*charAt)(const UReplaceable* rep,
+ int32_t offset);
+
+ /**
+ * Function pointer that returns a UChar32 code point at the given
+ * offset into this text. See unistr.h for a description of
+ * charAt() vs. char32At().
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param offset The index at which to fetch the UChar32 (code point).
+ * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
+ * @stable ICU 2.0
+ */
+ UChar32 (*char32At)(const UReplaceable* rep,
+ int32_t offset);
+
+ /**
+ * Function pointer that replaces text between start and limit in
+ * this text with the given text. Attributes (out of band info)
+ * should be retained.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start the starting index of the text to be replaced,
+ * inclusive.
+ * @param limit the ending index of the text to be replaced,
+ * exclusive.
+ * @param text the new text to replace the UChars from
+ * start..limit-1.
+ * @param textLength the number of UChars at text, or -1 if text
+ * is null-terminated.
+ * @stable ICU 2.0
+ */
+ void (*replace)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ const UChar* text,
+ int32_t textLength);
+
+ /**
+ * Function pointer that copies the characters in the range
+ * [start, limit) into the array dst.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start offset of first character which will be copied
+ * into the array
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param dst array in which to copy characters. The length of
+ * dst must be at least (limit - start).
+ * @stable ICU 2.1
+ */
+ void (*extract)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ UChar* dst);
+
+ /**
+ * Function pointer that copies text between start and limit in
+ * this text to another index in the text. Attributes (out of
+ * band info) should be retained. After this call, there will be
+ * (at least) two copies of the characters originally located at
+ * start..limit-1.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start the starting index of the text to be copied,
+ * inclusive.
+ * @param limit the ending index of the text to be copied,
+ * exclusive.
+ * @param dest the index at which the copy of the UChars should be
+ * inserted.
+ * @stable ICU 2.0
+ */
+ void (*copy)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ int32_t dest);
+
+} UReplaceableCallbacks;
+
+U_CDECL_END
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ures.h b/jni/EastAsianWidth/unicode/ures.h
new file mode 100644
index 0000000..9cc2e89
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ures.h
@@ -0,0 +1,871 @@
+/*
+**********************************************************************
+* Copyright (C) 1997-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File URES.H (formerly CRESBUND.H)
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 02/22/99 damiba overhaul.
+* 04/04/99 helena Fixed internal header inclusion.
+* 04/15/99 Madhu Updated Javadoc
+* 06/14/99 stephen Removed functions taking a filename suffix.
+* 07/20/99 stephen Language-independent ypedef to void*
+* 11/09/99 weiv Added ures_getLocale()
+* 06/24/02 weiv Added support for resource sharing
+******************************************************************************
+*/
+
+#ifndef URES_H
+#define URES_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+
+/**
+ * \file
+ * \brief C API: Resource Bundle
+ *
+ * C API: Resource Bundle
+ *
+ * C API representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specific information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ *
+ * @param status fills in the outgoing error code.
+ * The UErrorCode err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational status results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data or root locale data was used; neither the requested locale
+ * nor any of its fall back locales could be found. Please see the users guide for more
+ * information on this topic.
+ * @return a newly allocated resource bundle.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2
+ures_open(const char* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+
+/** This function does not care what kind of localeID is passed in. It simply opens a bundle with
+ * that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
+ * an %%ALIAS directive, the results are undefined.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ *
+ * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
+ * @return a newly allocated resource bundle or NULL if it doesn't exist.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2
+ures_openDirect(const char* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+/**
+ * Same as ures_open() but takes a const UChar *path.
+ * This path will be converted to char * using the default converter,
+ * then ures_open() is called.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ * @param status fills in the outgoing error code.
+ * @return a newly allocated resource bundle.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2
+ures_openU(const UChar* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+/**
+ * Returns the number of strings/arrays in resource bundles.
+ * Better to use ures_getSize, as this function will be deprecated.
+ *
+ *@param resourceBundle resource bundle containing the desired strings
+ *@param resourceKey key tagging the resource
+ *@param err fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_FALLBACK_WARNING
+ *@return: for Arrays: returns the number of resources in the array
+ * Tables: returns the number of resources in the table
+ * single string: returns 1
+ *@see ures_getSize
+ * @deprecated ICU 2.8 User ures_getSize instead
+ */
+U_DEPRECATED int32_t U_EXPORT2
+ures_countArrayItems(const UResourceBundle* resourceBundle,
+ const char* resourceKey,
+ UErrorCode* err);
+/**
+ * Close a resource bundle, all pointers returned from the various ures_getXXX calls
+ * on this particular bundle should be considered invalid henceforth.
+ *
+ * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ures_close(UResourceBundle* resourceBundle);
+
+/**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use ures_getVersion as this function is going to be deprecated.
+ *
+ * @param resourceBundle The resource bundle for which the version is checked.
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see ures_getVersion
+ * @deprecated ICU 2.8 Use ures_getVersion instead.
+ */
+U_DEPRECATED const char* U_EXPORT2
+ures_getVersionNumber(const UResourceBundle* resourceBundle);
+
+/**
+ * Return the version number associated with this ResourceBundle as an
+ * UVersionInfo array.
+ *
+ * @param resB The resource bundle for which the version is checked.
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ * as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ures_getVersion(const UResourceBundle* resB,
+ UVersionInfo versionInfo);
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle. This API allows
+ * you to query for the real locale of the resource. For example, if you requested
+ * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
+ * For subresources, the locale where this resource comes from will be returned.
+ * If fallback has occured, getLocale will reflect this.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
+ */
+U_DEPRECATED const char* U_EXPORT2
+ures_getLocale(const UResourceBundle* resourceBundle,
+ UErrorCode* status);
+
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle.
+ * You can choose between requested, valid and real locale.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2
+ures_getLocaleByType(const UResourceBundle* resourceBundle,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+
+/**
+ * Same as ures_open() but uses the fill-in parameter instead of allocating
+ * a bundle, if r!=NULL.
+ * TODO need to revisit usefulness of this function
+ * and usage model for fillIn parameters without knowing sizeof(UResourceBundle)
+ * @param r The resourcebundle to open
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by udata_open( packageName, "res", locale, err)
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param localeID specifies the locale for which we want to open the resource
+ * @param status The error code
+ * @return a newly allocated resource bundle or NULL if it doesn't exist.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+ures_openFillIn(UResourceBundle *r,
+ const char* packageName,
+ const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Returns a string from a string resource type
+ *
+ * @param resourceBundle a string resource
+ * @param len fills in the length of resulting string
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2
+ures_getString(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns a UTF-8 string from a string resource.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==TRUE, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==FALSE, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param length Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If TRUE, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If FALSE, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getString
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ures_getUTF8String(const UResourceBundle *resB,
+ char *dest, int32_t *length,
+ UBool forceCopy,
+ UErrorCode *status);
+
+/**
+ * Returns a binary data from a binary resource.
+ *
+ * @param resourceBundle a string resource
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a pointer to a chuck of unsigned bytes which live in a memory mapped/DLL file.
+ * @see ures_getString
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_STABLE const uint8_t* U_EXPORT2
+ures_getBinary(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns a 32 bit integer array from a resource.
+ *
+ * @param resourceBundle an int vector resource
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getString
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_STABLE const int32_t* U_EXPORT2
+ures_getIntVector(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns an unsigned integer from a resource.
+ * This integer is originally 28 bits.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return an integer value
+ * @see ures_getInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2
+ures_getUInt(const UResourceBundle* resourceBundle,
+ UErrorCode *status);
+
+/**
+ * Returns a signed integer from a resource.
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return an integer value
+ * @see ures_getUInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ures_getInt(const UResourceBundle* resourceBundle,
+ UErrorCode *status);
+
+/**
+ * Returns the size of a resource. Size for scalar types is always 1,
+ * and for vector/table types is the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
+ * @param resourceBundle a resource
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ures_getSize(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @param resourceBundle a resource
+ * @return type of the given resource.
+ * @see UResType
+ * @stable ICU 2.0
+ */
+U_STABLE UResType U_EXPORT2
+ures_getType(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the key associated with a given resource. Not all the resources have a key - only
+ * those that are members of a table.
+ *
+ * @param resourceBundle a resource
+ * @return a key associated to this resource, or NULL if it doesn't have a key
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ures_getKey(const UResourceBundle *resourceBundle);
+
+/* ITERATION API
+ This API provides means for iterating through a resource
+*/
+
+/**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @param resourceBundle a resource
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ures_resetIterator(UResourceBundle *resourceBundle);
+
+/**
+ * Checks whether the given resource has another element to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @return TRUE if there are more elements, FALSE if there is no more elements
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ures_hasNext(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the next resource in a given resource or NULL if there are no more resources
+ * to iterate over. Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code. You may still get a non NULL result even if an
+ * error occured. Check status instead.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2
+ures_getNextResource(UResourceBundle *resourceBundle,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns the next string in a given resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @param len fill in length of the string
+ * @param key fill in for key associated with this string. NULL if no key
+ * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2
+ures_getNextString(UResourceBundle *resourceBundle,
+ int32_t* len,
+ const char ** key,
+ UErrorCode *status);
+
+/**
+ * Returns the resource in a given resource at the specified index. Features a fill-in parameter.
+ *
+ * @param resourceBundle the resource bundle from which to get a sub-resource
+ * @param indexR an index to the wanted resource.
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
+ * occured. Check status instead.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2
+ures_getByIndex(const UResourceBundle *resourceBundle,
+ int32_t indexR,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param resourceBundle a resource
+ * @param indexS an index to the wanted string.
+ * @param len fill in length of the string
+ * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2
+ures_getStringByIndex(const UResourceBundle *resourceBundle,
+ int32_t indexS,
+ int32_t* len,
+ UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource at the specified index.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==TRUE, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==FALSE, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param index An index to the wanted string.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If TRUE, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If FALSE, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByIndex
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ures_getUTF8StringByIndex(const UResourceBundle *resB,
+ int32_t index,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status);
+
+/**
+ * Returns a resource in a given resource that has a given key. This procedure works only with table
+ * resources. Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param key a key associated with the wanted resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2
+ures_getByKey(const UResourceBundle *resourceBundle,
+ const char* key,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns a string in a given resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param resB a resource
+ * @param key a key associated with the wanted string
+ * @param len fill in length of the string
+ * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2
+ures_getStringByKey(const UResourceBundle *resB,
+ const char* key,
+ int32_t* len,
+ UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource and a key.
+ * This function works only with table resources.
+ *
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==TRUE, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==FALSE, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param key A key associated with the wanted resource
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If TRUE, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If FALSE, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByKey
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ures_getUTF8StringByKey(const UResourceBundle *resB,
+ const char *key,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status);
+
+#ifdef XP_CPLUSPLUS
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * returns a string from a string resource type
+ *
+ * @param resB a resource
+ * @param status: fills in the outgoing error code
+ * could be U_MISSING_RESOURCE_ERROR if the key is not found
+ * could be a non-failing error
+ * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING
+ * @return a UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeString(const UResourceBundle *resB,
+ UErrorCode* status)
+{
+ int32_t len = 0;
+ const UChar *r = ures_getString(resB, &len, status);
+ return UnicodeString(TRUE, r, len);
+}
+
+/**
+ * Returns the next string in a resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param resB a resource
+ * @param key fill in for key associated with this string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getNextUnicodeString(UResourceBundle *resB,
+ const char ** key,
+ UErrorCode* status)
+{
+ int32_t len = 0;
+ const UChar* r = ures_getNextString(resB, &len, key, status);
+ return UnicodeString(TRUE, r, len);
+}
+
+/**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param resB a resource
+ * @param index an index to the wanted string.
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeStringByIndex(const UResourceBundle *resB,
+ int32_t indexS,
+ UErrorCode* status)
+{
+ int32_t len = 0;
+ const UChar* r = ures_getStringByIndex(resB, indexS, &len, status);
+ return UnicodeString(TRUE, r, len);
+}
+
+/**
+ * Returns a string in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param resB a resource
+ * @param key a key associated with the wanted string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeStringByKey(const UResourceBundle *resB,
+ const char* key,
+ UErrorCode* status)
+{
+ int32_t len = 0;
+ const UChar* r = ures_getStringByKey(resB, key, &len, status);
+ return UnicodeString(TRUE, r, len);
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Create a string enumerator, owned by the caller, of all locales located within
+ * the specified resource tree.
+ * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll"
+ * This call is similar to uloc_getAvailable().
+ * @param status error code
+ * @stable ICU 3.2
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ures_openAvailableLocales(const char *packageName, UErrorCode *status);
+
+
+#endif /*_URES*/
+/*eof*/
diff --git a/jni/EastAsianWidth/unicode/uscript.h b/jni/EastAsianWidth/unicode/uscript.h
new file mode 100644
index 0000000..6aebc68
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/uscript.h
@@ -0,0 +1,238 @@
+/*
+ **********************************************************************
+ * Copyright (C) 1997-2007, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ * File USCRIPT.H
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 07/06/2001 Ram Creation.
+ ******************************************************************************
+ */
+
+#ifndef USCRIPT_H
+#define USCRIPT_H
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Script Information
+ */
+
+/**
+ * Constants for ISO 15924 script codes.
+ *
+ * Many of these script codes - those from Unicode's ScriptNames.txt -
+ * are character property values for Unicode's Script property.
+ * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
+ *
+ * Starting with ICU 3.6, constants for most ISO 15924 script codes
+ * are included (currently excluding private-use codes Qaaa..Qabx).
+ * For scripts for which there are codes in ISO 15924 but which are not
+ * used in the Unicode Character Database (UCD), there are no Unicode characters
+ * associated with those scripts.
+ *
+ * For example, there are no characters that have a UCD script code of
+ * Hans or Hant. All Han ideographs have the Hani script code.
+ * The Hans and Hant script codes are used with CLDR data.
+ *
+ * ISO 15924 script codes are included for use with CLDR and similar.
+ *
+ * @stable ICU 2.2
+ */
+typedef enum UScriptCode {
+ USCRIPT_INVALID_CODE = -1,
+ USCRIPT_COMMON = 0 , /* Zyyy */
+ USCRIPT_INHERITED = 1, /* Qaai */
+ USCRIPT_ARABIC = 2, /* Arab */
+ USCRIPT_ARMENIAN = 3, /* Armn */
+ USCRIPT_BENGALI = 4, /* Beng */
+ USCRIPT_BOPOMOFO = 5, /* Bopo */
+ USCRIPT_CHEROKEE = 6, /* Cher */
+ USCRIPT_COPTIC = 7, /* Copt */
+ USCRIPT_CYRILLIC = 8, /* Cyrl */
+ USCRIPT_DESERET = 9, /* Dsrt */
+ USCRIPT_DEVANAGARI = 10, /* Deva */
+ USCRIPT_ETHIOPIC = 11, /* Ethi */
+ USCRIPT_GEORGIAN = 12, /* Geor */
+ USCRIPT_GOTHIC = 13, /* Goth */
+ USCRIPT_GREEK = 14, /* Grek */
+ USCRIPT_GUJARATI = 15, /* Gujr */
+ USCRIPT_GURMUKHI = 16, /* Guru */
+ USCRIPT_HAN = 17, /* Hani */
+ USCRIPT_HANGUL = 18, /* Hang */
+ USCRIPT_HEBREW = 19, /* Hebr */
+ USCRIPT_HIRAGANA = 20, /* Hira */
+ USCRIPT_KANNADA = 21, /* Knda */
+ USCRIPT_KATAKANA = 22, /* Kana */
+ USCRIPT_KHMER = 23, /* Khmr */
+ USCRIPT_LAO = 24, /* Laoo */
+ USCRIPT_LATIN = 25, /* Latn */
+ USCRIPT_MALAYALAM = 26, /* Mlym */
+ USCRIPT_MONGOLIAN = 27, /* Mong */
+ USCRIPT_MYANMAR = 28, /* Mymr */
+ USCRIPT_OGHAM = 29, /* Ogam */
+ USCRIPT_OLD_ITALIC = 30, /* Ital */
+ USCRIPT_ORIYA = 31, /* Orya */
+ USCRIPT_RUNIC = 32, /* Runr */
+ USCRIPT_SINHALA = 33, /* Sinh */
+ USCRIPT_SYRIAC = 34, /* Syrc */
+ USCRIPT_TAMIL = 35, /* Taml */
+ USCRIPT_TELUGU = 36, /* Telu */
+ USCRIPT_THAANA = 37, /* Thaa */
+ USCRIPT_THAI = 38, /* Thai */
+ USCRIPT_TIBETAN = 39, /* Tibt */
+ /** Canadian_Aboriginal script. @stable ICU 2.6 */
+ USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
+ /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
+ USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
+ USCRIPT_YI = 41, /* Yiii */
+ USCRIPT_TAGALOG = 42, /* Tglg */
+ USCRIPT_HANUNOO = 43, /* Hano */
+ USCRIPT_BUHID = 44, /* Buhd */
+ USCRIPT_TAGBANWA = 45, /* Tagb */
+
+ /* New scripts in Unicode 4 @stable ICU 2.6 */
+ USCRIPT_BRAILLE = 46, /* Brai */
+ USCRIPT_CYPRIOT = 47, /* Cprt */
+ USCRIPT_LIMBU = 48, /* Limb */
+ USCRIPT_LINEAR_B = 49, /* Linb */
+ USCRIPT_OSMANYA = 50, /* Osma */
+ USCRIPT_SHAVIAN = 51, /* Shaw */
+ USCRIPT_TAI_LE = 52, /* Tale */
+ USCRIPT_UGARITIC = 53, /* Ugar */
+
+ /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
+ USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
+
+ /* New scripts in Unicode 4.1 @stable ICU 3.4 */
+ USCRIPT_BUGINESE = 55, /* Bugi */
+ USCRIPT_GLAGOLITIC = 56, /* Glag */
+ USCRIPT_KHAROSHTHI = 57, /* Khar */
+ USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
+ USCRIPT_NEW_TAI_LUE = 59, /* Talu */
+ USCRIPT_TIFINAGH = 60, /* Tfng */
+ USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
+
+ /* New script codes from ISO 15924 @stable ICU 3.6 */
+ USCRIPT_BALINESE = 62, /* Bali */
+ USCRIPT_BATAK = 63, /* Batk */
+ USCRIPT_BLISSYMBOLS = 64, /* Blis */
+ USCRIPT_BRAHMI = 65, /* Brah */
+ USCRIPT_CHAM = 66, /* Cham */
+ USCRIPT_CIRTH = 67, /* Cirt */
+ USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
+ USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
+ USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
+ USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
+ USCRIPT_KHUTSURI = 72, /* Geok */
+ USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
+ USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
+ USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
+ USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
+ USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
+ USCRIPT_JAVANESE = 78, /* Java */
+ USCRIPT_KAYAH_LI = 79, /* Kali */
+ USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
+ USCRIPT_LATIN_GAELIC = 81, /* Latg */
+ USCRIPT_LEPCHA = 82, /* Lepc */
+ USCRIPT_LINEAR_A = 83, /* Lina */
+ USCRIPT_MANDAEAN = 84, /* Mand */
+ USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
+ USCRIPT_MEROITIC = 86, /* Mero */
+ USCRIPT_NKO = 87, /* Nkoo */
+ USCRIPT_ORKHON = 88, /* Orkh */
+ USCRIPT_OLD_PERMIC = 89, /* Perm */
+ USCRIPT_PHAGS_PA = 90, /* Phag */
+ USCRIPT_PHOENICIAN = 91, /* Phnx */
+ USCRIPT_PHONETIC_POLLARD = 92, /* Plrd */
+ USCRIPT_RONGORONGO = 93, /* Roro */
+ USCRIPT_SARATI = 94, /* Sara */
+ USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
+ USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
+ USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
+ USCRIPT_TENGWAR = 98, /* Teng */
+ USCRIPT_VAI = 99, /* Vaii */
+ USCRIPT_VISIBLE_SPEECH = 100, /* Visp */
+ USCRIPT_CUNEIFORM = 101,/* Xsux */
+ USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
+ USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
+
+ /* New script codes from ISO 15924 @draft ICU 3.8 */
+ USCRIPT_CARIAN = 104,/* Cari */
+ USCRIPT_JAPANESE = 105,/* Jpan */
+ USCRIPT_LANNA = 106,/* Lana */
+ USCRIPT_LYCIAN = 107,/* Lyci */
+ USCRIPT_LYDIAN = 108,/* Lydi */
+ USCRIPT_OL_CHIKI = 109,/* Olck */
+ USCRIPT_REJANG = 110,/* Rjng */
+ USCRIPT_SAURASHTRA = 111,/* Saur */
+ USCRIPT_SIGN_WRITING = 112,/* Sgnw */
+ USCRIPT_SUNDANESE = 113,/* Sund */
+ USCRIPT_MOON = 114,/* Moon */
+ USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
+
+ /* Private use codes from Qaaa - Qabx are not supported*/
+ USCRIPT_CODE_LIMIT = 116
+} UScriptCode;
+
+/**
+ * Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
+ * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
+ * Fills in USCRIPT_LATIN given "en" OR "en_US"
+ * If required capacity is greater than capacity of the destination buffer then the error code
+ * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
+ *
+ * start - end.
+ * If start > end then this USet is set to an empty range.
+ * A frozen set will not be modified.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_set(USet* set,
+ UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * Empties the set passed before applying the pattern.
+ * A frozen set will not be modified.
+ * @param set The set to which the pattern is to be applied.
+ * @param pattern A pointer to UChar string specifying what characters are in the set.
+ * The character at pattern[0] must be a '['.
+ * @param patternLength The length of the UChar string. -1 if NUL terminated.
+ * @param options A bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status Returns an error if the pattern cannot be parsed.
+ * @return Upon successful parse, the value is either
+ * the index of the character after the closing ']'
+ * of the parsed pattern.
+ * If the status code indicates failure, then the return value
+ * is the index of the error in the source.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+ UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely. See PropertyAliases.txt for names and a
+ * description of loose matching. If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID. Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely. See PropertyValueAliases.txt for names
+ * and a description of loose matching. In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+ const UChar *prop, int32_t propLength,
+ const UChar *value, int32_t valueLength,
+ UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+ int32_t pos);
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet. After this call,
+ * uset_contains(set, c) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the union of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet. After this call,
+ * uset_contains(set, start, end) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the source string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet. After this call,
+ * uset_contains(set, c) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet. After this call,
+ * uset_contains(set, start, end) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the asymmetric set difference of
+ * the two sets.
+ * A frozen set will not be modified.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If start > end then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the intersection of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perfrom the compact
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This operation does not affect
+ * the multicharacter strings, if any.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Returns TRUE if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns TRUE if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns TRUE if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return TRUE if set contains the range
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns TRUE if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * charAt().
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range, return (UChar32)-1. The inverse of this method is
+ * indexOf().
+ * @param set the set
+ * @param index an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 3.2
+ */
+U_STABLE UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t index);
+
+/**
+ * Returns the number of characters and strings contained in the given
+ * USet.
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * Returns the number of items in this set. An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set. An item is either a range of
+ * characters or a single multicharacter string.
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..
+ * uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character
+ * in range, inclusive
+ * @param end pointer to variable to receive last character in range,
+ * inclusive
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code
+ * @return the length of the string (>= 2), or 0 if the item is a
+ * range, in which case it is the range *start..*end, or -1 if
+ * itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a superset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string. This is does not check containment of grapheme
+ * clusters, like uset_containsString.
+ * @param set set of characters to be checked for containment
+ * @param str string containing codepoints to be checked for containment
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if the test condition is met
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @draft ICU 3.8
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param pErrorCode pointer to the error code. Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns TRUE if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set. Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+ UChar32* pStart, UChar32* pEnd);
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/usetiter.h b/jni/EastAsianWidth/unicode/usetiter.h
new file mode 100644
index 0000000..defa75c
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/usetiter.h
@@ -0,0 +1,318 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef USETITER_H
+#define USETITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ *
+ * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
+ * iterates over either code points or code point ranges. After all
+ * code points or ranges have been returned, it returns the
+ * multicharacter strings of the UnicodSet, if any.
+ *
+ * This class is not intended to be subclassed. Consider any fields
+ * or methods declared as "protected" to be private. The use of
+ * protected in this class is an artifact of history.
+ *
+ *
+ * UnicodeSetIterator it(set);
+ * while (set.next()) {
+ * processItem(set.getString());
+ * }
+ *
+ *
+ * UnicodeSetIterator it(set);
+ * while (it.nextRange()) {
+ * if (it.isString()) {
+ * processString(it.getString());
+ * } else {
+ * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
+ * }
+ * }
+ *
+ * @author M. Davis
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeSetIterator : public UObject {
+
+ protected:
+
+ /**
+ * Value of codepoint if the iterator points to a string.
+ * If codepoint == IS_STRING, then examine
+ * string for the current iteration result.
+ * @stable ICU 2.4
+ */
+ enum { IS_STRING = -1 };
+
+ /**
+ * Current code point, or the special value IS_STRING, if
+ * the iterator points to a string.
+ * @stable ICU 2.4
+ */
+ UChar32 codepoint;
+
+ /**
+ * When iterating over ranges using nextRange(),
+ * codepointEnd contains the inclusive end of the
+ * iteration range, if codepoint != IS_STRING. If
+ * iterating over code points using next(), or if
+ * codepoint == IS_STRING, then the value of
+ * codepointEnd is undefined.
+ * @stable ICU 2.4
+ */
+ UChar32 codepointEnd;
+
+ /**
+ * If codepoint == IS_STRING, then string points
+ * to the current string. If codepoint != IS_STRING, the
+ * value of string is undefined.
+ * @stable ICU 2.4
+ */
+ const UnicodeString* string;
+
+ public:
+
+ /**
+ * Create an iterator over the given set. The iterator is valid
+ * only so long as set is valid.
+ * @param set set to iterate over
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator(const UnicodeSet& set);
+
+ /**
+ * Create an iterator over nothing. next() and
+ * nextRange() return false. This is a convenience
+ * constructor allowing the target to be set later.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator();
+
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeSetIterator();
+
+ /**
+ * Returns true if the current element is a string. If so, the
+ * caller can retrieve it with getString(). If this
+ * method returns false, the current element is a code point or
+ * code point range, depending on whether next() or
+ * nextRange() was called.
+ * Elements of types string and codepoint can both be retrieved
+ * with the function getString().
+ * Elements of type codepoint can also be retrieved with
+ * getCodepoint().
+ * For ranges, getCodepoint() returns the starting codepoint
+ * of the range, and getCodepointEnd() returns the end
+ * of the range.
+ * @stable ICU 2.4
+ */
+ inline UBool isString() const;
+
+ /**
+ * Returns the current code point, if isString() returned
+ * false. Otherwise returns an undefined result.
+ * @stable ICU 2.4
+ */
+ inline UChar32 getCodepoint() const;
+
+ /**
+ * Returns the end of the current code point range, if
+ * isString() returned false and nextRange() was
+ * called. Otherwise returns an undefined result.
+ * @stable ICU 2.4
+ */
+ inline UChar32 getCodepointEnd() const;
+
+ /**
+ * Returns the current string, if isString() returned
+ * true. If the current iteration item is a code point, a UnicodeString
+ * containing that single code point is returned.
+ *
+ * Ownership of the returned string remains with the iterator.
+ * The string is guaranteed to remain valid only until the iterator is
+ * advanced to the next item, or until the iterator is deleted.
+ *
+ * @stable ICU 2.4
+ */
+ const UnicodeString& getString();
+
+ /**
+ * Advances the iteration position to the next element in the set,
+ * which can be either a single code point or a string.
+ * If there are no more elements in the set, return false.
+ *
+ * U_SHAPE_... options.source.
+ *
+ * @param dest The destination buffer that will receive the results of the
+ * requested operations. It may be NULL only if
+ * destSize is 0. The source and destination must not
+ * overlap.
+ *
+ * @param destSize The size (capacity) of the destination buffer in UChars.
+ * If destSize is 0, then no output is produced,
+ * but the necessary buffer size is returned ("preflighting").
+ *
+ * @param options This is a 32-bit set of flags that specify the operations
+ * that are performed on the input text. If no error occurs,
+ * then the result will always be written to the destination
+ * buffer.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return The number of UChars written to the destination buffer.
+ * If an error occured, then no output was written, or it may be
+ * incomplete. If U_BUFFER_OVERFLOW_ERROR is set, then
+ * the return value indicates the necessary destination buffer size.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_shapeArabic(const UChar *source, int32_t sourceLength,
+ UChar *dest, int32_t destSize,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_GROW_SHRINK 0
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
+
+/** Bit mask for memory options. @stable ICU 2.0 */
+#define U_SHAPE_LENGTH_MASK 3
+
+
+/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
+
+/**
+ * Direction indicator:
+ * the source is in visual LTR order,
+ * the leftmost displayed character stored first.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
+
+/** Bit mask for direction indicators. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_MASK 4
+
+
+/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_NOOP 0
+
+/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_SHAPE 8
+
+/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_UNSHAPE 0x10
+
+/**
+ * Letter shaping option: replace abstract letter characters by "shaped" ones.
+ * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
+ * are always "shaped" into the isolated form instead of the medial form
+ * (selecting code points from the Arabic Presentation Forms-B block).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
+
+/** Bit mask for letter shaping options. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_MASK 0x18
+
+
+/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_NOOP 0
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_EN2AN 0x20
+
+/**
+ * Digit shaping option:
+ * Replace Arabic-Indic digits by European digits (U+0030...).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_AN2EN 0x40
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (u_charDirection() result U_RIGHT_TO_LEFT_ARABIC [AL]).
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be not an Arabic letter
+ * (it is U_LEFT_TO_RIGHT [L] or U_RIGHT_TO_LEFT [R]).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (u_charDirection() result U_RIGHT_TO_LEFT_ARABIC [AL]).
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be an Arabic letter.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_RESERVED 0xa0
+
+/** Bit mask for digit shaping options. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_MASK 0xe0
+
+
+/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN 0
+
+/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
+
+/** Bit mask for digit type options. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_MASK 0x3f00
+
+/**
+ * Tashkeel aggregation option:
+ * Replaces any combination of U+0651 with one of
+ * U+064C, U+064D, U+064E, U+064F, U+0650 with
+ * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000
+/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0
+/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000
+
+/**
+ * Presentation form option:
+ * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
+ * characters with 0+06xx characters, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
+/** Presentation form option:
+ * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
+ * their unshaped correspondants in range 0+06xx, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
+/** Bit mask for preserve presentation form. @stable ICU 3.6 */
+#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/usprep.h b/jni/EastAsianWidth/unicode/usprep.h
new file mode 100644
index 0000000..c7e75a5
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/usprep.h
@@ -0,0 +1,156 @@
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2006, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: usprep.h
+ * encoding: US-ASCII
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003jul2
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __USPREP_H__
+#define __USPREP_H__
+
+/**
+ * \file
+ * \brief C API: Implements the StringPrep algorithm.
+ */
+
+#include "unicode/utypes.h"
+/**
+ *
+ * StringPrep API implements the StingPrep framework as described by RFC 3454.
+ * StringPrep prepares Unicode strings for use in network protocols.
+ * Profiles of StingPrep are set of rules and data according to with the
+ * Unicode Strings are prepared. Each profiles contains tables which describe
+ * how a code point should be treated. The tables are broadly classied into
+ *
+ *
+ *
+ * The procedure for preparing Unicode strings:
+ *
+ *
+ * @author Ram Viswanadha
+ */
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+
+/**
+ * The StringPrep profile
+ * @stable ICU 2.8
+ */
+typedef struct UStringPrepProfile UStringPrepProfile;
+
+
+/**
+ * Option to prohibit processing of unassigned code points in the input
+ *
+ * @see usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_DEFAULT 0x0000
+
+/**
+ * Option to allow processing of unassigned code points in the input
+ *
+ * @see usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_ALLOW_UNASSIGNED 0x0001
+
+
+/**
+ * Creates a StringPrep profile from the data file.
+ *
+ * @param path string containing the full path pointing to the directory
+ * where the profile reside followed by the package name
+ * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
+ * if NULL, ICU default data files will be used.
+ * @param fileName name of the profile file to be opened
+ * @param status ICU error code in/out parameter. Must not be NULL.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Pointer to UStringPrepProfile that is opened. Should be closed by
+ * calling usprep_close()
+ * @see usprep_close()
+ * @stable ICU 2.8
+ */
+U_STABLE UStringPrepProfile* U_EXPORT2
+usprep_open(const char* path,
+ const char* fileName,
+ UErrorCode* status);
+
+
+/**
+ * Closes the profile
+ * @param profile The profile to close
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+usprep_close(UStringPrepProfile* profile);
+
+
+/**
+ * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
+ * checks for prohited and BiDi characters in the order defined by RFC 3454
+ * depending on the options specified in the profile.
+ *
+ * @param prep The profile to use
+ * @param src Pointer to UChar buffer containing the string to prepare
+ * @param srcLength Number of characters in the source string
+ * @param dest Pointer to the destination buffer to receive the output
+ * @param destCapacity The capacity of destination array
+ * @param options A bit set of options:
+ *
+ * - USPREP_NONE Prohibit processing of unassigned code points in the input
+ *
+ * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
+ * as normal Unicode code points.
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The number of UChars in the destination buffer
+ * @stable ICU 2.8
+ */
+
+U_STABLE int32_t U_EXPORT2
+usprep_prepare( const UStringPrepProfile* prep,
+ const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status );
+
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/jni/EastAsianWidth/unicode/ustring.h b/jni/EastAsianWidth/unicode/ustring.h
new file mode 100644
index 0000000..f6a7947
--- /dev/null
+++ b/jni/EastAsianWidth/unicode/ustring.h
@@ -0,0 +1,1478 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ typedef void UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C chars, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+/*@}*/
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ * code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings. Appends a copy of src,
+ * including the null terminator, to dst. The initial copied
+ * character from src overwrites the null terminator in dst.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src);
+
+/**
+ * Concatenate two ustrings.
+ * Appends at most n characters from src to dst.
+ * Adds a terminating NUL.
+ * If src is too long, then only n-1 characters will be copied
+ * before the terminating NUL.
+ * If n<=0 then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to compare.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of substring in s,
+ * or s itself if the substring is empty,
+ * or NULL if substring is not in s.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of substring in s,
+ * or s itself if the substring is empty,
+ * or NULL if substring is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of substring in s,
+ * or s itself if the substring is empty,
+ * or NULL if substring is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of substring in s,
+ * or s itself if the substring is empty,
+ * or NULL if substring is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string string of any of the characters
+ * in the string matchSet.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return A pointer to the character in string that matches one of the
+ * characters in matchSet, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in string,
+ * beginning with the first, that do not occur somewhere in matchSet.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in string that do not
+ * occur in matchSet.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in string,
+ * beginning with the first, that occur somewhere in matchSet.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in string that do
+ * occur in matchSet.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ * After the first call to u_strtok_r(), this argument must
+ * be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ * which is set by this function. The saveState
+ * parameter should the address of a local variable of type
+ * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
+ * &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ * when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if s1 and s2 are bitwise equal; a negative
+ * value if s1 is bitwise less than s2,; a positive
+ * value if s1 is bitwise greater than s2.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (FALSE)
+ * and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (FALSE)
+ * and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ * u_strCompare(u_strFoldCase(s1, options),
+ * u_strFoldCase(s2, options),
+ * (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality.
+ * Compares at most n characters.
+ *
+ * @param ucs1 A string to compare.
+ * @param ucs2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return 0 if s1 and s2 are bitwise equal; a negative
+ * value if s1 is bitwise less than s2; a positive
+ * value if s1 is bitwise greater than s2.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmp(const UChar *ucs1,
+ const UChar *ucs2,
+ int32_t n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most n characters. The result will be null terminated
+ * if the length of src is less than n.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+ const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most n characters. The result will be null terminated
+ * if the length of src is less than n.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+ const char *src,
+ int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
+ const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most n characters. The result will be null terminated
+ * if the length of src is less than n.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to dst.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
+ const UChar *src,
+ int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string
+ * @param count The number of characters to copy
+ * @return A pointer to dest
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string
+ * @param count The number of characters to move
+ * @return A pointer to dest
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize count characters of dest to c.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to dest.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first count UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ * When buf1 == buf2, 0 is returned.
+ * When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains count UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains count UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains count UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains count UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of c in s
+ * or NULL if c is not in s.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically intialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * NUL, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ *
+ * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ * static UBool didInit=FALSE;
+ *
+ * int32_t function() {
+ * if(!didInit) {
+ * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ * didInit=TRUE;
+ * }
+ * return u_strcmp(ustringVar1, ustringVar2);
+ * }
+ *
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=U_DECLARE_UTF16(cs)
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#else
+# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer. The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\x{h...} 1-8 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc. Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it. The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator. May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid. On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest. Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_unescape(const char *src,
+ UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer. The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash. This method takes a callback
+ * pointer to a function that returns the UChar at a given offset. By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence. On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text. The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (NULL), then a standard titlecase
+ * break iterator is opened.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-fold the characters in a string.
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+/**
+ * Converts a sequence of UChars to wchar_t units.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+/**
+ * Converts a sequence of wchar_t units to UChars
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromWCS(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
+
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strToUTF8WithSub
+ * @see u_strFromUTF8
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF-8 bytes.
+ * Same as u_strToUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF8
+ * @see u_strFromUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ * Same as u_strFromUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8Lenient
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ * Same as u_strFromUTF8() except that this function is designed to be very fast,
+ * which it achieves by being lenient about malformed UTF-8 sequences.
+ * This function is intended for use in environments where UTF-8 text is
+ * expected to be well-formed.
+ *
+ * Its semantics are:
+ * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
+ * - The function will not read beyond the input string, nor write beyond
+ * the destCapacity.
+ * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
+ * be well-formed UTF-16.
+ * The function will resynchronize to valid code point boundaries
+ * within a small number of code points after an illegal sequence.
+ * - Non-shortest forms are not detected and will result in "spoofing" output.
+ *
+ * For further performance improvement, if srcLength is given (>=0),
+ * then it must be destCapacity>=srcLength.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * Unlike for other ICU functions, if srcLength>=0 then it
+ * must be destCapacity>=srcLength.
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * Unlike for other ICU functions, if srcLength>=0 but
+ * destCapacitydelta code points
+ * forward or backward, but no further backward than to 0 and
+ * no further forward than to utext_nativeLength().
+ * The resulting index value will be in between 0 and length, inclusive.
+ *
+ * @param ut the text to be accessed.
+ * @param delta the signed number of code points to move the iteration position.
+ * @return TRUE if the position could be moved the requested number of positions while
+ * staying within the range [0 - text length].
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_moveIndex32(UText *ut, int32_t delta);
+
+/**
+ * Get the native index of the character preceeding the current position.
+ * If the iteration position is already at the start of the text, zero
+ * is returned.
+ * The value returned is the same as that obtained from the following sequence,
+ * but without the side effect of changing the iteration position.
+ *
+ * \code
+ * UText *ut = whatever;
+ * ...
+ * utext_previous(ut)
+ * utext_getNativeIndex(ut);
+ * \endcode
+ *
+ * This function is most useful during forwards iteration, where it will get the
+ * native index of the character most recently returned from utext_next().
+ *
+ * @param ut the text to be accessed
+ * @return the native index of the character preceeding the current index position,
+ * or zero if the current position is at the start of the text.
+ * @stable ICU 3.6
+ */
+U_STABLE int64_t U_EXPORT2
+utext_getPreviousNativeIndex(UText *ut);
+
+
+/**
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * while(i
+ *
+ * Obsolete part of pre-ICU 2.4 utf.h file documentation:
+ *
+ *
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.
+ *
+ * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=FALSE.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ *
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \code
+ * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _
+ * \endcode
+ *
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)