All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.CollationParsedRuleBuilder Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/**
 *******************************************************************************
 * Copyright (C) 1996-2011, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.ibm.icu.impl.IntTrieBuilder;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;
import com.ibm.icu.impl.TrieBuilder;
import com.ibm.icu.impl.TrieIterator;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.VersionInfo;

/**
 * Class for building a collator from a list of collation rules. This class is
 * uses CollationRuleParser
 * 
 * @author Syn Wee Quek
 * @since release 2.2, June 11 2002
 */
final class CollationParsedRuleBuilder {
    // package private constructors ------------------------------------------

    /**
     * Constructor
     * 
     * @param rules
     *            collation rules
     * @exception ParseException
     *                thrown when argument rules have an invalid syntax
     */
    CollationParsedRuleBuilder(String rules) throws ParseException {
        m_parser_ = new CollationRuleParser(rules);
        m_parser_.assembleTokenList();
        m_utilColEIter_ = RuleBasedCollator.UCA_
                .getCollationElementIterator("");
    }

    // package private inner classes -----------------------------------------

    /**
     * Inverse UCA wrapper
     */
    static class InverseUCA {
        // package private constructor ---------------------------------------

        InverseUCA() {
        }

        // package private data member ---------------------------------------

        /**
         * Array list of characters
         */
        int m_table_[];
        /**
         * Array list of continuation characters
         */
        char m_continuations_[];

        /**
         * UCA version of inverse UCA table
         */
        VersionInfo m_UCA_version_;

        // package private method --------------------------------------------

        /**
         * Returns the previous inverse ces of the argument ces
         * 
         * @param ce
         *            ce to test
         * @param contce
         *            continuation ce to test
         * @param strength
         *            collation strength
         * @param prevresult
         *            an array to store the return results previous inverse ce
         *            and previous inverse continuation ce
         * @return result of the inverse ce
         */
        final int getInversePrevCE(int ce, int contce, int strength,
                int prevresult[]) {
            int result = findInverseCE(ce, contce);

            if (result < 0) {
                prevresult[0] = CollationElementIterator.NULLORDER;
                return -1;
            }

            ce &= STRENGTH_MASK_[strength];
            contce &= STRENGTH_MASK_[strength];

            prevresult[0] = ce;
            prevresult[1] = contce;

            while ((prevresult[0] & STRENGTH_MASK_[strength]) == ce
                    && (prevresult[1] & STRENGTH_MASK_[strength]) == contce
                    && result > 0) {
                // this condition should prevent falling off the edge of the
                // world
                // here, we end up in a singularity - zero
                prevresult[0] = m_table_[3 * (--result)];
                prevresult[1] = m_table_[3 * result + 1];
            }
            return result;
        }

        final int getCEStrengthDifference(int CE, int contCE, int prevCE,
                int prevContCE) {
            int strength = Collator.TERTIARY;
            while (((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength]))
                    && (strength != 0)) {
                strength--;
            }
            return strength;
        }

        private int compareCEs(int source0, int source1, int target0,
                int target1) {
            int s1 = source0, s2, t1 = target0, t2;
            if (RuleBasedCollator.isContinuation(source1)) {
                s2 = source1;
            } else {
                s2 = 0;
            }
            if (RuleBasedCollator.isContinuation(target1)) {
                t2 = target1;
            } else {
                t2 = 0;
            }

            int s = 0, t = 0;
            if (s1 == t1 && s2 == t2) {
                return 0;
            }
            s = (s1 & 0xFFFF0000) | ((s2 & 0xFFFF0000) >>> 16);
            t = (t1 & 0xFFFF0000) | ((t2 & 0xFFFF0000) >>> 16);
            if (s == t) {
                s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00) >> 8;
                t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00) >> 8;
                if (s == t) {
                    s = (s1 & 0x000000FF) << 8 | (s2 & 0x000000FF);
                    t = (t1 & 0x000000FF) << 8 | (t2 & 0x000000FF);
                    return Utility.compareUnsigned(s, t);
                } else {
                    return Utility.compareUnsigned(s, t);
                }
            } else {
                return Utility.compareUnsigned(s, t);
            }
        }

        /**
         * Finding the inverse CE of the argument CEs
         * 
         * @param ce
         *            CE to be tested
         * @param contce
         *            continuation CE
         * @return inverse CE
         */
        int findInverseCE(int ce, int contce) {
            int bottom = 0;
            int top = m_table_.length / 3;
            int result = 0;

            while (bottom < top - 1) {
                result = (top + bottom) >> 1;
                int first = m_table_[3 * result];
                int second = m_table_[3 * result + 1];
                int comparison = compareCEs(first, second, ce, contce);
                if (comparison > 0) {
                    top = result;
                } else if (comparison < 0) {
                    bottom = result;
                } else {
                    break;
                }
            }

            return result;
        }

        /**
         * Getting gap offsets in the inverse UCA
         * 
         * @param listheader
         *            parsed token lists
         * @exception Exception
         *                thrown when error occurs while finding the collation
         *                gaps
         */
        void getInverseGapPositions(
                CollationRuleParser.TokenListHeader listheader)
                throws Exception {
            // reset all the gaps
            CollationRuleParser.Token token = listheader.m_first_;
            int tokenstrength = token.m_strength_;

            for (int i = 0; i < 3; i++) {
                listheader.m_gapsHi_[3 * i] = 0;
                listheader.m_gapsHi_[3 * i + 1] = 0;
                listheader.m_gapsHi_[3 * i + 2] = 0;
                listheader.m_gapsLo_[3 * i] = 0;
                listheader.m_gapsLo_[3 * i + 1] = 0;
                listheader.m_gapsLo_[3 * i + 2] = 0;
                listheader.m_numStr_[i] = 0;
                listheader.m_fStrToken_[i] = null;
                listheader.m_lStrToken_[i] = null;
                listheader.m_pos_[i] = -1;
            }

            if ((listheader.m_baseCE_ >>> 24) >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_
                    && (listheader.m_baseCE_ >>> 24) <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) {
                // implicits -
                listheader.m_pos_[0] = 0;
                int t1 = listheader.m_baseCE_;
                int t2 = listheader.m_baseContCE_;
                listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
                listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
                listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
                int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_
                        | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16;
                primaryCE = RuleBasedCollator.impCEGen_
                        .getImplicitFromRaw(RuleBasedCollator.impCEGen_
                                .getRawFromImplicit(primaryCE) + 1);

                t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
                t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_
                        | RuleBasedCollator.CE_CONTINUATION_MARKER_;

                // if (listheader.m_baseCE_ < 0xEF000000) {
                // // first implicits have three byte primaries, with a gap of
                // // one so we esentially need to add 2 to the top byte in
                // // listheader.m_baseContCE_
                // t2 += 0x02000000;
                // }
                // else {
                // // second implicits have four byte primaries, with a gap of
                // // IMPLICIT_LAST2_MULTIPLIER_
                // // Now, this guy is not really accessible here, so until we
                // // find a better way to pass it around, assume that the gap
                // is 1
                // t2 += 0x00020000;
                // }
                listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
                listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
                listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
            } else if (listheader.m_indirect_ == true
                    && listheader.m_nextCE_ != 0) {
                listheader.m_pos_[0] = 0;
                int t1 = listheader.m_baseCE_;
                int t2 = listheader.m_baseContCE_;
                listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
                listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
                listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
                t1 = listheader.m_nextCE_;
                t2 = listheader.m_nextContCE_;
                listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
                listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
                listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
            } else {
                while (true) {
                    if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
                        listheader.m_pos_[tokenstrength] = getInverseNext(
                                listheader, tokenstrength);
                        if (listheader.m_pos_[tokenstrength] >= 0) {
                            listheader.m_fStrToken_[tokenstrength] = token;
                        } else {
                            // The CE must be implicit, since it's not in the
                            // table
                            // Error
                            throw new Exception("Internal program error");
                        }
                    }

                    while (token != null && token.m_strength_ >= tokenstrength) {
                        if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
                            listheader.m_lStrToken_[tokenstrength] = token;
                        }
                        token = token.m_next_;
                    }
                    if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) {
                        // check if previous interval is the same and merge the
                        // intervals if it is so
                        if (listheader.m_pos_[tokenstrength] == listheader.m_pos_[tokenstrength + 1]) {
                            listheader.m_fStrToken_[tokenstrength] = listheader.m_fStrToken_[tokenstrength + 1];
                            listheader.m_fStrToken_[tokenstrength + 1] = null;
                            listheader.m_lStrToken_[tokenstrength + 1] = null;
                            listheader.m_pos_[tokenstrength + 1] = -1;
                        }
                    }
                    if (token != null) {
                        tokenstrength = token.m_strength_;
                    } else {
                        break;
                    }
                }
                for (int st = 0; st < 3; st++) {
                    int pos = listheader.m_pos_[st];
                    if (pos >= 0) {
                        int t1 = m_table_[3 * pos];
                        int t2 = m_table_[3 * pos + 1];
                        listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2,
                                Collator.PRIMARY);
                        listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2,
                                Collator.SECONDARY);
                        listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24
                                | (t2 & 0x3f) << 16;
                        // pos --;
                        // t1 = m_table_[3 * pos];
                        // t2 = m_table_[3 * pos + 1];
                        t1 = listheader.m_baseCE_;
                        t2 = listheader.m_baseContCE_;

                        listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2,
                                Collator.PRIMARY);
                        listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2,
                                Collator.SECONDARY);
                        listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24
                                | (t2 & 0x3f) << 16;
                    }
                }
            }
        }

        /**
         * Gets the next CE in the inverse table
         * 
         * @param listheader
         *            token list header
         * @param strength
         *            collation strength
         * @return next ce
         */
        private final int getInverseNext(
                CollationRuleParser.TokenListHeader listheader, int strength) {
            int ce = listheader.m_baseCE_;
            int secondce = listheader.m_baseContCE_;
            int result = findInverseCE(ce, secondce);

            if (result < 0) {
                return -1;
            }

            ce &= STRENGTH_MASK_[strength];
            secondce &= STRENGTH_MASK_[strength];

            int nextce = ce;
            int nextcontce = secondce;

            while ((nextce & STRENGTH_MASK_[strength]) == ce
                    && (nextcontce & STRENGTH_MASK_[strength]) == secondce) {
                nextce = m_table_[3 * (++result)];
                nextcontce = m_table_[3 * result + 1];
            }

            listheader.m_nextCE_ = nextce;
            listheader.m_nextContCE_ = nextcontce;

            return result;
        }
    }

    // package private data members ------------------------------------------

    /**
     * Inverse UCA, instantiate only when required
     */
    static final InverseUCA INVERSE_UCA_;

    /**
     * UCA and Inverse UCA version do not match
     */
    private static final String INV_UCA_VERSION_MISMATCH_ = "UCA versions of UCA and inverse UCA should match";

    /**
     * UCA and Inverse UCA version do not match
     */
    private static final String UCA_NOT_INSTANTIATED_ = "UCA is not instantiated!";

    /**
     * Initializing the inverse UCA
     */
    static {
        InverseUCA temp = null;
        try {
            temp = CollatorReader.getInverseUCA();
        } catch (IOException e) {
        }
        /*
         * try { String invdat = "/com/ibm/icu/impl/data/invuca.icu";
         * InputStream i =
         * CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
         * BufferedInputStream b = new BufferedInputStream(i, 110000);
         * INVERSE_UCA_ = CollatorReader.readInverseUCA(b); b.close();
         * i.close(); } catch (Exception e) { e.printStackTrace(); throw new
         * RuntimeException(e.getMessage()); }
         */

        if (temp != null && RuleBasedCollator.UCA_ != null) {
            if (!temp.m_UCA_version_
                    .equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
                throw new RuntimeException(INV_UCA_VERSION_MISMATCH_);
            }
        } else {
            throw new RuntimeException(UCA_NOT_INSTANTIATED_);
        }

        INVERSE_UCA_ = temp;
    }

    // package private methods -----------------------------------------------

    /**
     * Parse and sets the collation rules in the argument collator
     * 
     * @param collator
     *            to set
     * @exception Exception
     *                thrown when internal program error occurs
     */
    void setRules(RuleBasedCollator collator) throws Exception {
        if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) {
            // we have a set of rules, let's make something of it
            assembleTailoringTable(collator);
        } else { // no rules, but no error either must be only options
            // We will init the collator from UCA
            collator.setWithUCATables();
        }
        // And set only the options
        m_parser_.setDefaultOptionsInCollator(collator);
    }

    private void copyRangeFromUCA(BuildTable t, int start, int end) {
        int u = 0;
        for (u = start; u <= end; u++) {
            // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND
            int CE = t.m_mapping_.getValue(u);
            if (CE == CE_NOT_FOUND_
            // this test is for contractions that are missing the starting
                    // element. Looks like latin-1 should be done before
                    // assembling the table, even if it results in more false
                    // closure elements
                    || (isContractionTableElement(CE) && getCE(
                            t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) {
                // m_utilElement_.m_uchars_ = str.toString();
                m_utilElement_.m_uchars_ = UCharacter.toString(u);
                m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
                m_utilElement_.m_prefix_ = 0;
                m_utilElement_.m_CELength_ = 0;
                m_utilElement_.m_prefixChars_ = null;
                m_utilColEIter_.setText(m_utilElement_.m_uchars_);
                while (CE != CollationElementIterator.NULLORDER) {
                    CE = m_utilColEIter_.next();
                    if (CE != CollationElementIterator.NULLORDER) {
                        m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
                    }
                }
                addAnElement(t, m_utilElement_);
            }
        }
    }

    /**
     * 2. Eliminate the negative lists by doing the following for each non-null
     * negative list: o if previousCE(baseCE, strongestN) != some ListHeader X's
     * baseCE, create new ListHeader X o reverse the list, add to the end of X's
     * positive list. Reset the strength of the first item you add, based on the
     * stronger strength levels of the two lists.
     * 
     * 3. For each ListHeader with a non-null positive list: o Find all
     * character strings with CEs between the baseCE and the next/previous CE,
     * at the strength of the first token. Add these to the tailoring. ? That
     * is, if UCA has ... x <<< X << x' <<< X' < y ..., and the tailoring has &
     * x < z... ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
     * 
     * It is possible that this part should be done even while constructing list
     * The problem is that it is unknown what is going to be the strongest
     * weight. So we might as well do it here o Allocate CEs for each token in
     * the list, based on the total number N of the largest level difference,
     * and the gap G between baseCE and nextCE at that level. The relation *
     * between the last item and nextCE is the same as the strongest strength. o
     * Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) ? There are 3
     * primary items: a, d, e. Fit them into the primary gap. Then fit b and c
     * into the secondary gap between a and d, then fit q into the tertiary gap
     * between b and c. o Example: baseCE << b <<< q << c * nextCE(X,2) ? There
     * are 2 secondary items: b, c. Fit them into the secondary gap. Then fit q
     * into the tertiary gap between b and c. o When incrementing primary
     * values, we will not cross high byte boundaries except where there is only
     * a single-byte primary. That is to ensure that the script reordering will
     * continue to work.
     * 
     * @param collator
     *            the rule based collator to update
     * @exception Exception
     *                thrown when internal program error occurs
     */
    void assembleTailoringTable(RuleBasedCollator collator) throws Exception {

        for (int i = 0; i < m_parser_.m_resultLength_; i++) {
            // now we need to generate the CEs
            // We stuff the initial value in the buffers, and increase the
            // appropriate buffer according to strength
            if (m_parser_.m_listHeader_[i].m_first_ != null) {
                // if there are any elements
                // due to the way parser works, subsequent tailorings
                // may remove all the elements from a sequence, therefore
                // leaving an empty tailoring sequence.
                initBuffers(m_parser_.m_listHeader_[i]);
            }
        }

        if (m_parser_.m_variableTop_ != null) {
            // stuff the variable top value
            m_parser_.m_options_.m_variableTopValue_ = m_parser_.m_variableTop_.m_CE_[0] >>> 16;
            // remove it from the list
            if (m_parser_.m_variableTop_.m_listHeader_.m_first_ == m_parser_.m_variableTop_) { // first
                                                                                               // in
                                                                                               // list
                m_parser_.m_variableTop_.m_listHeader_.m_first_ = m_parser_.m_variableTop_.m_next_;
            }
            if (m_parser_.m_variableTop_.m_listHeader_.m_last_ == m_parser_.m_variableTop_) {
                // first in list
                m_parser_.m_variableTop_.m_listHeader_.m_last_ = m_parser_.m_variableTop_.m_previous_;
            }
            if (m_parser_.m_variableTop_.m_next_ != null) {
                m_parser_.m_variableTop_.m_next_.m_previous_ = m_parser_.m_variableTop_.m_previous_;
            }
            if (m_parser_.m_variableTop_.m_previous_ != null) {
                m_parser_.m_variableTop_.m_previous_.m_next_ = m_parser_.m_variableTop_.m_next_;
            }
        }

        BuildTable t = new BuildTable(m_parser_);

        // After this, we have assigned CE values to all regular CEs now we
        // will go through list once more and resolve expansions, make
        // UCAElements structs and add them to table
        for (int i = 0; i < m_parser_.m_resultLength_; i++) {
            // now we need to generate the CEs
            // We stuff the initial value in the buffers, and increase the
            // appropriate buffer according to strength */
            createElements(t, m_parser_.m_listHeader_[i]);
        }

        m_utilElement_.clear();

        // add latin-1 stuff
        copyRangeFromUCA(t, 0, 0xFF);

        // add stuff for copying
        if (m_parser_.m_copySet_ != null) {
            int i = 0;
            for (i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) {
                copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i),
                        m_parser_.m_copySet_.getRangeEnd(i));
            }
        }

        // copy contractions from the UCA - this is felt mostly for cyrillic
        char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_;
        int maxUCAContractionLength = RuleBasedCollator.MAX_UCA_CONTRACTION_LENGTH;
        int offset = 0;
        while (conts[offset] != 0) {
            // A continuation is NUL-terminated and NUL-padded
            // except if it has the maximum length.
            int contractionLength = maxUCAContractionLength;
            while (contractionLength > 0 && conts[offset + contractionLength - 1] == 0) {
                --contractionLength;
            }
            int first = Character.codePointAt(conts, offset);
            int firstLength = Character.charCount(first);
            int tailoredCE = t.m_mapping_.getValue(first);
            Elements prefixElm = null;
            if (tailoredCE != CE_NOT_FOUND_) {
                boolean needToAdd = true;
                if (isContractionTableElement(tailoredCE)) {
                    if (isTailored(t.m_contractions_, tailoredCE, conts,
                            offset + firstLength) == true) {
                        needToAdd = false;
                    }
                }
                if (!needToAdd && isPrefix(tailoredCE)
                        && conts[offset + 1] == 0) {
                    // pre-context character in UCA
                    // The format for pre-context character is
                    // conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
                    Elements elm = new Elements();
                    elm.m_CELength_ = 0;
                    elm.m_uchars_ = Character.toString(conts[offset]);
                    elm.m_cPoints_ = m_utilElement_.m_uchars_;
                    elm.m_prefixChars_ = Character.toString(conts[offset + 2]);
                    elm.m_prefix_ = 0; // TODO(claireho) : confirm!
                    prefixElm = t.m_prefixLookup_.get(elm);
                    if ((prefixElm == null)
                            || (prefixElm.m_prefixChars_.charAt(0) != conts[offset + 2])) {
                        needToAdd = true;
                    }
                }
                if (m_parser_.m_removeSet_ != null
                        && m_parser_.m_removeSet_.contains(first)) {
                    needToAdd = false;
                }

                if (needToAdd == true) {
                    // we need to add if this contraction is not tailored.
                    if (conts[offset + 1] != 0) { // not precontext
                        m_utilElement_.m_prefix_ = 0;
                        m_utilElement_.m_prefixChars_ = null;
                        m_utilElement_.m_uchars_ = new String(conts, offset, contractionLength);
                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
                        m_utilElement_.m_CELength_ = 0;
                        m_utilColEIter_.setText(m_utilElement_.m_uchars_);
                    } else { // add a pre-context element
                        int preKeyLen = 0;
                        m_utilElement_.m_uchars_ = Character.toString(conts[offset]);
                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
                        m_utilElement_.m_CELength_ = 0;
                        m_utilElement_.m_prefixChars_ = Character.toString(conts[offset + 2]);
                        if (prefixElm == null) {
                            m_utilElement_.m_prefix_ = 0;
                        } else { // TODO (claireho): confirm!
                            m_utilElement_.m_prefix_ = m_utilElement_.m_prefix_;
                            // m_utilElement_.m_prefix_= prefixElm.m_prefix_;
                        }
                        m_utilColEIter_.setText(m_utilElement_.m_prefixChars_);
                        while (m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
                            // count number of keys for pre-context char.
                            preKeyLen++;
                        }
                        m_utilColEIter_.setText(m_utilElement_.m_prefixChars_ + m_utilElement_.m_uchars_);
                        // Skip the keys for prefix character, then copy the
                        // rest to el.
                        while ((preKeyLen-- > 0)
                                && m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
                            continue;
                        }

                    }
                    while (true) {
                        int CE = m_utilColEIter_.next();
                        if (CE != CollationElementIterator.NULLORDER) {
                            m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
                        } else {
                            break;
                        }
                    }
                    addAnElement(t, m_utilElement_);
                }
            } else if (m_parser_.m_removeSet_ != null
                    && m_parser_.m_removeSet_.contains(first)) {
                copyRangeFromUCA(t, first, first);
            }

            offset += maxUCAContractionLength;
        }

        // Add completely ignorable elements
        processUCACompleteIgnorables(t);

        // canonical closure
        canonicalClosure(t);

        // still need to produce compatibility closure
        assembleTable(t, collator);
    }

    // private inner classes -------------------------------------------------

    @SuppressWarnings("unused")
    private static class CEGenerator {
        // package private data members --------------------------------------

        WeightRange m_ranges_[];
        int m_rangesLength_;
        int m_byteSize_;
        int m_start_;
        int m_limit_;
        int m_maxCount_;
        int m_count_;
        int m_current_;
        int m_fLow_; // forbidden Low
        int m_fHigh_; // forbidden High

        // package private constructor ---------------------------------------

        CEGenerator() {
            m_ranges_ = new WeightRange[7];
            for (int i = 6; i >= 0; i--) {
                m_ranges_[i] = new WeightRange();
            }
        }
    }

    private static class WeightRange implements Comparable {
        // public methods ----------------------------------------------------

        /**
         * Compares this object with target
         * 
         * @param target object to compare with
         * @return 0 if equals, 1 if this is > target, -1 otherwise
         */
        public int compareTo(WeightRange target) {
            return Utility.compareUnsigned(m_start_, target.m_start_);
        }

        /**
         * Initialize
         */
        public void clear() {
            m_start_ = 0;
            m_end_ = 0;
            m_length_ = 0;
            m_count_ = 0;
            m_length2_ = 0;
            m_count2_ = 0;
        }

        // package private data members --------------------------------------

        int m_start_;
        int m_end_;
        int m_length_;
        int m_count_;
        int m_length2_;
        int m_count2_;

        // package private constructor ---------------------------------------

        WeightRange() {
            clear();
        }

        /**
         * Copy constructor. Cloneable is troublesome, needs to check for
         * exception
         * 
         * @param source
         *            to clone
         */
        WeightRange(WeightRange source) {
            m_start_ = source.m_start_;
            m_end_ = source.m_end_;
            m_length_ = source.m_length_;
            m_count_ = source.m_count_;
            m_length2_ = source.m_length2_;
            m_count2_ = source.m_count2_;
        }
    }

    private static class MaxJamoExpansionTable {
        // package private data members --------------------------------------

        List m_endExpansionCE_;
        // vector of booleans
        List m_isV_;
        byte m_maxLSize_;
        byte m_maxVSize_;
        byte m_maxTSize_;

        // package private constructor ---------------------------------------

        MaxJamoExpansionTable() {
            m_endExpansionCE_ = new ArrayList();
            m_isV_ = new ArrayList();
            m_endExpansionCE_.add(Integer.valueOf(0));
            m_isV_.add(Boolean.FALSE);
            m_maxLSize_ = 1;
            m_maxVSize_ = 1;
            m_maxTSize_ = 1;
        }

        MaxJamoExpansionTable(MaxJamoExpansionTable table) {
            m_endExpansionCE_ = new ArrayList(table.m_endExpansionCE_);
            m_isV_ = new ArrayList(table.m_isV_);
            m_maxLSize_ = table.m_maxLSize_;
            m_maxVSize_ = table.m_maxVSize_;
            m_maxTSize_ = table.m_maxTSize_;
        }
    }

    private static class MaxExpansionTable {
        // package private constructor --------------------------------------

        MaxExpansionTable() {
            m_endExpansionCE_ = new ArrayList();
            m_expansionCESize_ = new ArrayList();
            m_endExpansionCE_.add(Integer.valueOf(0));
            m_expansionCESize_.add(Byte.valueOf((byte) 0));
        }

        MaxExpansionTable(MaxExpansionTable table) {
            m_endExpansionCE_ = new ArrayList(table.m_endExpansionCE_);
            m_expansionCESize_ = new ArrayList(table.m_expansionCESize_);
        }

        // package private data member --------------------------------------

        List m_endExpansionCE_;
        List m_expansionCESize_;
    }

    private static class BasicContractionTable {
        // package private constructors -------------------------------------

        BasicContractionTable() {
            m_CEs_ = new ArrayList();
            m_codePoints_ = new StringBuilder();
        }

        // package private data members -------------------------------------

        StringBuilder m_codePoints_;
        List m_CEs_;
    }

    private static class ContractionTable {
        // package private constructor --------------------------------------

        /**
         * Builds a contraction table
         * 
         * @param mapping
         */
        ContractionTable(IntTrieBuilder mapping) {
            m_mapping_ = mapping;
            m_elements_ = new ArrayList();
            m_CEs_ = new ArrayList();
            m_codePoints_ = new StringBuilder();
            m_offsets_ = new ArrayList();
            m_currentTag_ = CE_NOT_FOUND_TAG_;
        }

        /**
         * Copies a contraction table. Not all data will be copied into their
         * own object.
         * 
         * @param table
         */
        ContractionTable(ContractionTable table) {
            m_mapping_ = table.m_mapping_;
            m_elements_ = new ArrayList(table.m_elements_);
            m_codePoints_ = new StringBuilder(table.m_codePoints_);
            m_CEs_ = new ArrayList(table.m_CEs_);
            m_offsets_ = new ArrayList(table.m_offsets_);
            m_currentTag_ = table.m_currentTag_;
        }

        // package private data members ------------------------------------

        /**
         * Vector of BasicContractionTable
         */
        List m_elements_;
        IntTrieBuilder m_mapping_;
        StringBuilder m_codePoints_;
        List m_CEs_;
        List m_offsets_;
        int m_currentTag_;
    }

    /**
     * Private class for combining mark table. The table is indexed by the class
     * value(0-255).
     */
    @SuppressWarnings("unused")
    private static class CombinClassTable {
        /**
         * accumulated numbers of combining marks.
         */
        int[] index = new int[256];

        /**
         * code point array for combining marks.
         */
        char[] cPoints;

        /**
         * size of cPoints.
         */
        int size;

        // constructor
        CombinClassTable() {
            cPoints = null;
            size = 0;
            pos = 0;
            curClass = 1;
        }

        /**
         * Copy the combining mark table from ccc and index in compact way.
         * 
         * @param cps
         *            : code point array
         * @param size
         *            : size of ccc
         * @param index
         *            : index of combining classes(0-255)
         */
        void generate(char[] cps, int numOfCM, int[] ccIndex) {
            int count = 0;

            cPoints = new char[numOfCM];
            for (int i = 0; i < 256; i++) {
                for (int j = 0; j < ccIndex[i]; j++) {
                    cPoints[count++] = cps[(i << 8) + j];
                }
                index[i] = count;
            }
            size = count;
        }

        /**
         * Get first CM(combining mark) with the combining class value cClass.
         * 
         * @param cClass
         *            : combining class value.
         * @return combining mark codepoint or 0 if no combining make with class
         *         value cClass
         */
        char GetFirstCM(int cClass) {
            curClass = cClass;
            if (cPoints == null || cClass == 0
                    || index[cClass] == index[cClass - 1]) {
                return 0;
            }
            pos = 1;
            return cPoints[index[cClass - 1]];
        }

        /**
         * Get next CM(combining mark) with the combining class value cClass.
         * Return combining mark codepoint or 0 if no next CM.
         */
        char GetNextCM() {
            if (cPoints == null
                    || index[curClass] == (index[curClass - 1] + pos)) {
                return 0;
            }
            return cPoints[index[curClass - 1] + (pos++)];
        }

        // private data members
        int pos;
        int curClass;
    }

    private static final class BuildTable implements TrieBuilder.DataManipulate {
        // package private methods ------------------------------------------

        /**
         * For construction of the Trie tables. Has to be labeled public
         * 
         * @param cp The value of the code point.
         * @param offset The value of the offset.
         * @return data offset or 0
         */
        public int getFoldedValue(int cp, int offset) {
            int limit = cp + 0x400;
            while (cp < limit) {
                int value = m_mapping_.getValue(cp);
                boolean inBlockZero = m_mapping_.isInZeroBlock(cp);
                int tag = getCETag(value);
                if (inBlockZero == true) {
                    cp += TrieBuilder.DATA_BLOCK_LENGTH;
                } else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ || tag == CE_NOT_FOUND_TAG_))) {
                    // These are values that are starting in either UCA
                    // (IMPLICIT_TAG) or in the tailorings (NOT_FOUND_TAG).
                    // Presence of these tags means that there is nothing in
                    // this position and that it should be skipped.
                    return RuleBasedCollator.CE_SPECIAL_FLAG_
                            | (CE_SURROGATE_TAG_ << 24) | offset;
                } else {
                    ++cp;
                }
            }
            return 0;
        }

        // package private constructor --------------------------------------

        /**
         * Returns a table
         */
        BuildTable(CollationRuleParser parser) {
            m_collator_ = new RuleBasedCollator();
            m_collator_.setWithUCAData();
            MaxExpansionTable maxet = new MaxExpansionTable();
            MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable();
            m_options_ = parser.m_options_;
            m_expansions_ = new ArrayList();
            // Do your own mallocs for the structure, array and have linear
            // Latin 1
            int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_
                    | (CE_NOT_FOUND_TAG_ << 24);
            // temporary fix for jb3822, 0x100000 -> 30000
            m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue,
                    trieinitialvalue, true);
            m_prefixLookup_ = new HashMap();
            // uhash_open(prefixLookupHash, prefixLookupComp);
            m_contractions_ = new ContractionTable(m_mapping_);
            // copy UCA's maxexpansion and merge as we go along
            m_maxExpansions_ = maxet;
            // adding an extra initial value for easier manipulation
            for (int i = 0; i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i++) {
                maxet.m_endExpansionCE_.add(Integer.valueOf(
                        RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
                maxet.m_expansionCESize_.add(Byte.valueOf(
                        RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
            }
            m_maxJamoExpansions_ = maxjet;

            m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_];
            m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_];
            Arrays.fill(m_unsafeCP_, (byte) 0);
            Arrays.fill(m_contrEndCP_, (byte) 0);
        }

        /**
         * Duplicating a BuildTable. Not all data will be duplicated into their
         * own object.
         * 
         * @param table
         *            to clone
         */
        BuildTable(BuildTable table) {
            m_collator_ = table.m_collator_;
            m_mapping_ = new IntTrieBuilder(table.m_mapping_);
            m_expansions_ = new ArrayList(table.m_expansions_);
            m_contractions_ = new ContractionTable(table.m_contractions_);
            m_contractions_.m_mapping_ = m_mapping_;
            m_options_ = table.m_options_;
            m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_);
            m_maxJamoExpansions_ = new MaxJamoExpansionTable(
                    table.m_maxJamoExpansions_);
            m_unsafeCP_ = new byte[table.m_unsafeCP_.length];
            System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0,
                    m_unsafeCP_.length);
            m_contrEndCP_ = new byte[table.m_contrEndCP_.length];
            System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0,
                    m_contrEndCP_.length);
        }

        // package private data members -------------------------------------

        RuleBasedCollator m_collator_;
        IntTrieBuilder m_mapping_;
        List m_expansions_;
        ContractionTable m_contractions_;
        // UCATableHeader image;
        CollationRuleParser.OptionSet m_options_;
        MaxExpansionTable m_maxExpansions_;
        MaxJamoExpansionTable m_maxJamoExpansions_;
        byte m_unsafeCP_[];
        byte m_contrEndCP_[];
        Map m_prefixLookup_;
        CombinClassTable cmLookup = null;
    }

    private static class Elements {
        // package private data members -------------------------------------

        String m_prefixChars_;
        int m_prefix_;
        String m_uchars_;
        /**
         * Working string
         */
        String m_cPoints_;
        /**
         * Offset to the working string
         */
        int m_cPointsOffset_;
        /**
         * These are collation elements - there could be more than one - in case
         * of expansion
         */
        int m_CEs_[];
        int m_CELength_;
        /**
         * This is the value element maps in original table
         */
        int m_mapCE_;
        int m_sizePrim_[];
        int m_sizeSec_[];
        int m_sizeTer_[];
        boolean m_variableTop_;
        boolean m_caseBit_;

        // package private constructors -------------------------------------

        /**
         * Package private constructor
         */
        Elements() {
            m_sizePrim_ = new int[128];
            m_sizeSec_ = new int[128];
            m_sizeTer_ = new int[128];
            m_CEs_ = new int[256];
            m_CELength_ = 0;
        }

        /**
         * Package private constructor
         */
        Elements(Elements element) {
            m_prefixChars_ = element.m_prefixChars_;
            m_prefix_ = element.m_prefix_;
            m_uchars_ = element.m_uchars_;
            m_cPoints_ = element.m_cPoints_;
            m_cPointsOffset_ = element.m_cPointsOffset_;
            m_CEs_ = element.m_CEs_;
            m_CELength_ = element.m_CELength_;
            m_mapCE_ = element.m_mapCE_;
            m_sizePrim_ = element.m_sizePrim_;
            m_sizeSec_ = element.m_sizeSec_;
            m_sizeTer_ = element.m_sizeTer_;
            m_variableTop_ = element.m_variableTop_;
            m_caseBit_ = element.m_caseBit_;
        }

        // package private methods -------------------------------------------

        /**
         * Initializing the elements
         */
        public void clear() {
            m_prefixChars_ = null;
            m_prefix_ = 0;
            m_uchars_ = null;
            m_cPoints_ = null;
            m_cPointsOffset_ = 0;
            m_CELength_ = 0;
            m_mapCE_ = 0;
            Arrays.fill(m_sizePrim_, 0);
            Arrays.fill(m_sizeSec_, 0);
            Arrays.fill(m_sizeTer_, 0);
            m_variableTop_ = false;
            m_caseBit_ = false;
        }

        /**
         * Hashcode calculation for token
         * 
         * @return the hashcode
         */
        public int hashCode() {
            String str = m_cPoints_.substring(m_cPointsOffset_);
            return str.hashCode();
        }

        /**
         * Equals calculation
         * 
         * @param target Object to compare
         * @return true if target is the same as this object
         */
        public boolean equals(Object target) {
            if (target == this) {
                return true;
            }
            if (target instanceof Elements) {
                Elements t = (Elements) target;
                int size = m_cPoints_.length() - m_cPointsOffset_;
                if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) {
                    return t.m_cPoints_.regionMatches(t.m_cPointsOffset_,
                            m_cPoints_, m_cPointsOffset_, size);
                }
            }
            return false;
        }
    }

    // private data member ---------------------------------------------------

    /**
     * Maximum strength used in CE building
     */
    private static final int CE_BASIC_STRENGTH_LIMIT_ = 3;
    /**
     * Maximum collation strength
     */
    private static final int CE_STRENGTH_LIMIT_ = 16;
    /**
     * Strength mask array, used in inverse UCA
     */
    private static final int STRENGTH_MASK_[] = { 0xFFFF0000, 0xFFFFFF00,
        0xFFFFFFFF };
    /**
     * CE tag for not found
     */
    private static final int CE_NOT_FOUND_ = 0xF0000000;
    /**
     * CE tag for not found
     */
    private static final int CE_NOT_FOUND_TAG_ = 0;
    /**
     * This code point results in an expansion
     */
    private static final int CE_EXPANSION_TAG_ = 1;
    /**
     * Start of a contraction
     */
    private static final int CE_CONTRACTION_TAG_ = 2;
    /*
     * Thai character - do the reordering
     */
    // private static final int CE_THAI_TAG_ = 3;
    /*
     * Charset processing, not yet implemented
     */
    // private static final int CE_CHARSET_TAG_ = 4;
    /**
     * Lead surrogate that is tailored and doesn't start a contraction
     */
    private static final int CE_SURROGATE_TAG_ = 5;
    /*
     * AC00-D7AF
     */
    // private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
    /*
     * D800-DBFF
     */
    // private static final int CE_LEAD_SURROGATE_TAG_ = 7;
    /*
     * DC00-DFFF
     */
    // private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
    /*
     * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
     */
    // private static final int CE_CJK_IMPLICIT_TAG_ = 9;
    private static final int CE_IMPLICIT_TAG_ = 10;
    private static final int CE_SPEC_PROC_TAG_ = 11;
    /**
     * This is a three byte primary with starting secondaries and tertiaries. It
     * fits in a single 32 bit CE and is used instead of expansion to save space
     * without affecting the performance (hopefully)
     */
    private static final int CE_LONG_PRIMARY_TAG_ = 12;
    /**
     * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each
     * latin 1 char + some power of two for hashing the rest of the chars. Size
     * in bytes
     */
    private static final int UNSAFECP_TABLE_SIZE_ = 1056;
    /**
     * Mask value down to "some power of two" -1. Number of bits, not num of
     * bytes.
     */
    private static final int UNSAFECP_TABLE_MASK_ = 0x1fff;
    /**
     * Case values
     */
    private static final int UPPER_CASE_ = 0x80;
    private static final int MIXED_CASE_ = 0x40;
    private static final int LOWER_CASE_ = 0x00;
    /*
     * Initial table size
     */
    // private static final int INIT_TABLE_SIZE_ = 1028;
    /*
     * Header size, copied from ICU4C, to be changed when that value changes
     */
    // private static final int HEADER_SIZE_ = 0xC4;
    /**
     * Contraction table new element indicator
     */
    private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF;
    /**
     * Parser for the rules
     */
    private CollationRuleParser m_parser_;
    /**
     * Utility UCA collation element iterator
     */
    private CollationElementIterator m_utilColEIter_;
    /**
     * Utility data members
     */
    private CEGenerator m_utilGens_[] = { new CEGenerator(), new CEGenerator(),
        new CEGenerator() };
    private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_];
    private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_];
    private Elements m_utilElement_ = new Elements();
    private Elements m_utilElement2_ = new Elements();
    private CollationRuleParser.Token m_utilToken_ = new CollationRuleParser.Token();
    private int m_utilCountBuffer_[] = new int[6];
    private long m_utilLongBuffer_[] = new long[5];
    private WeightRange m_utilLowerWeightRange_[] = { new WeightRange(),
        new WeightRange(), new WeightRange(), new WeightRange(),
        new WeightRange() };
    private WeightRange m_utilUpperWeightRange_[] = { new WeightRange(),
        new WeightRange(), new WeightRange(), new WeightRange(),
        new WeightRange() };
    private WeightRange m_utilWeightRange_ = new WeightRange();
    private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
    private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
    private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
    // Flag indicating a combining marks table is required or not.
    private static boolean buildCMTabFlag = false;

    // private methods -------------------------------------------------------

    /**
     * @param listheader
     *            parsed rule tokens
     * @exception Exception
     *                thrown when internal error occurs
     */
    private void initBuffers(CollationRuleParser.TokenListHeader listheader)
            throws Exception {
        CollationRuleParser.Token token = listheader.m_last_;
        Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0);

        token.m_toInsert_ = 1;
        m_utilIntBuffer_[token.m_strength_] = 1;
        while (token.m_previous_ != null) {
            if (token.m_previous_.m_strength_ < token.m_strength_) {
                // going up
                m_utilIntBuffer_[token.m_strength_] = 0;
                m_utilIntBuffer_[token.m_previous_.m_strength_]++;
            } else if (token.m_previous_.m_strength_ > token.m_strength_) {
                // going down
                m_utilIntBuffer_[token.m_previous_.m_strength_] = 1;
            } else {
                m_utilIntBuffer_[token.m_strength_]++;
            }
            token = token.m_previous_;
            token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
        }

        token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
        INVERSE_UCA_.getInverseGapPositions(listheader);

        token = listheader.m_first_;
        int fstrength = Collator.IDENTICAL;
        int initstrength = Collator.IDENTICAL;

        m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_,
                listheader.m_baseContCE_, Collator.PRIMARY);
        m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_,
                listheader.m_baseContCE_, Collator.SECONDARY);
        m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_,
                listheader.m_baseContCE_, Collator.TERTIARY);
        while (token != null) {
            fstrength = token.m_strength_;
            if (fstrength < initstrength) {
                initstrength = fstrength;
                if (listheader.m_pos_[fstrength] == -1) {
                    while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) {
                        fstrength--;
                    }
                    if (listheader.m_pos_[fstrength] == -1) {
                        throw new Exception("Internal program error");
                    }
                }
                if (initstrength == Collator.TERTIARY) {
                    // starting with tertiary
                    m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
                    m_utilCEBuffer_[Collator.SECONDARY] = listheader.m_gapsLo_[fstrength * 3 + 1];
                    m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator(
                            m_utilGens_[Collator.TERTIARY],
                            listheader.m_gapsLo_, listheader.m_gapsHi_, token,
                            fstrength);
                } else if (initstrength == Collator.SECONDARY) {
                    // secondaries
                    m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
                    m_utilCEBuffer_[Collator.SECONDARY] = getCEGenerator(
                            m_utilGens_[Collator.SECONDARY],
                            listheader.m_gapsLo_, listheader.m_gapsHi_, token,
                            fstrength);
                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
                            m_utilGens_[Collator.TERTIARY], token,
                            Collator.TERTIARY);
                } else {
                    // primaries
                    m_utilCEBuffer_[Collator.PRIMARY] = getCEGenerator(
                            m_utilGens_[Collator.PRIMARY],
                            listheader.m_gapsLo_, listheader.m_gapsHi_, token,
                            fstrength);
                    m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(
                            m_utilGens_[Collator.SECONDARY], token,
                            Collator.SECONDARY);
                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
                            m_utilGens_[Collator.TERTIARY], token,
                            Collator.TERTIARY);
                }
            } else {
                if (token.m_strength_ == Collator.TERTIARY) {
                    m_utilCEBuffer_[Collator.TERTIARY] = getNextGenerated(m_utilGens_[Collator.TERTIARY]);
                } else if (token.m_strength_ == Collator.SECONDARY) {
                    m_utilCEBuffer_[Collator.SECONDARY] = getNextGenerated(m_utilGens_[Collator.SECONDARY]);
                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
                            m_utilGens_[Collator.TERTIARY], token,
                            Collator.TERTIARY);
                } else if (token.m_strength_ == Collator.PRIMARY) {
                    m_utilCEBuffer_[Collator.PRIMARY] = getNextGenerated(m_utilGens_[Collator.PRIMARY]);
                    m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(
                            m_utilGens_[Collator.SECONDARY], token,
                            Collator.SECONDARY);
                    m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
                            m_utilGens_[Collator.TERTIARY], token,
                            Collator.TERTIARY);
                }
            }
            doCE(m_utilCEBuffer_, token);
            token = token.m_next_;
        }
    }

    /**
     * Get the next generated ce
     * 
     * @param g
     *            ce generator
     * @return next generated ce
     */
    private int getNextGenerated(CEGenerator g) {
        g.m_current_ = nextWeight(g);
        return g.m_current_;
    }

    /**
     * @param g
     *            CEGenerator
     * @param token
     *            rule token
     * @param strength
     * @return ce generator
     * @exception Exception
     *                thrown when internal error occurs
     */
    private int getSimpleCEGenerator(CEGenerator g,
            CollationRuleParser.Token token, int strength) throws Exception {
        int high, low, count = 1;
        int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;

        if (strength == Collator.SECONDARY) {
            low = RuleBasedCollator.COMMON_TOP_2_ << 24;
            high = 0xFFFFFFFF;
            count = 0xFF - RuleBasedCollator.COMMON_TOP_2_;
        } else {
            low = RuleBasedCollator.BYTE_COMMON_ << 24; // 0x05000000;
            high = 0x40000000;
            count = 0x40 - RuleBasedCollator.BYTE_COMMON_;
        }

        if (token.m_next_ != null && token.m_next_.m_strength_ == strength) {
            count = token.m_next_.m_toInsert_;
        }

        g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte,
                g.m_ranges_);
        g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24;

        if (g.m_rangesLength_ == 0) {
            throw new Exception("Internal program error");
        }
        return g.m_current_;
    }

    /**
     * Combines 2 ce into one with respect to the argument strength
     * 
     * @param ce1
     *            first ce
     * @param ce2
     *            second ce
     * @param strength
     *            strength to use
     * @return combined ce
     */
    private static int mergeCE(int ce1, int ce2, int strength) {
        int mask = RuleBasedCollator.CE_TERTIARY_MASK_;
        if (strength == Collator.SECONDARY) {
            mask = RuleBasedCollator.CE_SECONDARY_MASK_;
        } else if (strength == Collator.PRIMARY) {
            mask = RuleBasedCollator.CE_PRIMARY_MASK_;
        }
        ce1 &= mask;
        ce2 &= mask;
        switch (strength) {
        case Collator.PRIMARY:
            return ce1 | ce2 >>> 16;
        case Collator.SECONDARY:
            return ce1 << 16 | ce2 << 8;
        default:
            return ce1 << 24 | ce2 << 16;
        }
    }

    /**
     * @param g
     *            CEGenerator
     * @param lows
     *            low gap array
     * @param highs
     *            high gap array
     * @param token
     *            rule token
     * @param fstrength
     * @exception Exception
     *                thrown when internal error occurs
     */
    private int getCEGenerator(CEGenerator g, int lows[], int highs[],
            CollationRuleParser.Token token, int fstrength) throws Exception {
        int strength = token.m_strength_;
        int low = lows[fstrength * 3 + strength];
        int high = highs[fstrength * 3 + strength];
        int maxbyte = 0;
        if (strength == Collator.TERTIARY) {
            maxbyte = 0x3F;
        } else if (strength == Collator.PRIMARY) {
            maxbyte = 0xFE;
        } else {
            maxbyte = 0xFF;
        }

        int count = token.m_toInsert_;

        if (Utility.compareUnsigned(low, high) >= 0
                && strength > Collator.PRIMARY) {
            int s = strength;
            while (true) {
                s--;
                if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
                    if (strength == Collator.SECONDARY) {
                        if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
                            // Override if low range is less than
                            // UCOL_COMMON_TOP2.
                            low = RuleBasedCollator.COMMON_TOP_2_ << 24;
                        }
                        high = 0xFFFFFFFF;
                    } else {
                        if (low < RuleBasedCollator.COMMON_BOTTOM_3 << 24) {
                            // Override if low range is less than
                            // UCOL_COMMON_BOT3.
                            low = RuleBasedCollator.COMMON_BOTTOM_3 << 24;
                        }
                        high = 0x40000000;
                    }
                    break;
                }
                if (s < 0) {
                    throw new Exception("Internal program error");
                }
            }
        }
        if(0 <= low && low < 0x02000000) {  // unsigned comparison < 0x02000000
            // We must not use CE weight byte 02, so we set it as the minimum lower bound.
            // See http://site.icu-project.org/design/collation/bytes
            low = 0x02000000;
        }

        if (strength == Collator.SECONDARY) { // similar as simple
            if (Utility.compareUnsigned(low,
                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0
                    && Utility.compareUnsigned(low,
                            RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
                low = RuleBasedCollator.COMMON_TOP_2_ << 24;
            }
            if (Utility.compareUnsigned(high,
                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0
                    && Utility.compareUnsigned(high,
                            RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
                high = RuleBasedCollator.COMMON_TOP_2_ << 24;
            }
            if (Utility.compareUnsigned(low,
                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) {
                g.m_rangesLength_ = allocateWeights(
                        RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, high,
                        count, maxbyte, g.m_ranges_);
                g.m_current_ = nextWeight(g);
                // g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
                return g.m_current_;
            }
        }

        g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte,
                g.m_ranges_);
        if (g.m_rangesLength_ == 0) {
            throw new Exception("Internal program error");
        }
        g.m_current_ = nextWeight(g);
        return g.m_current_;
    }

    /**
     * @param ceparts
     *            list of collation elements parts
     * @param token
     *            rule token
     * @exception Exception
     *                thrown when forming case bits for expansions fails
     */
    private void doCE(int ceparts[], CollationRuleParser.Token token)
            throws Exception {
        // this one makes the table and stuff
        // int noofbytes[] = new int[3];
        for (int i = 0; i < 3; i++) {
            // noofbytes[i] = countBytes(ceparts[i]);
            m_utilIntBuffer_[i] = countBytes(ceparts[i]);
        }

        // Here we have to pack CEs from parts
        int cei = 0;
        int value = 0;

        while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1]
                || cei < m_utilIntBuffer_[2]) {
            if (cei > 0) {
                value = RuleBasedCollator.CE_CONTINUATION_MARKER_;
            } else {
                value = 0;
            }

            if ((cei << 1) < m_utilIntBuffer_[0]) {
                value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) << 16;
            }
            if (cei < m_utilIntBuffer_[1]) {
                value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8;
            }

            if (cei < m_utilIntBuffer_[2]) {
                value |= ((ceparts[2] >> (32 - ((cei + 1) << 3))) & 0x3F);
            }
            token.m_CE_[cei] = value;
            cei++;
        }
        if (cei == 0) { // totally ignorable
            token.m_CELength_ = 1;
            token.m_CE_[0] = 0;
        } else { // there is at least something
            token.m_CELength_ = cei;
        }

        // Case bits handling for expansion
        if (token.m_CE_[0] != 0) { // case bits should be set only for
                                   // non-ignorables
            token.m_CE_[0] &= 0xFFFFFF3F; // Clean the case bits field
            int cSize = (token.m_source_ & 0xFF000000) >>> 24;
            int startoftokenrule = token.m_source_ & 0x00FFFFFF;

            if (cSize > 1) {
                // Do it manually
                String tokenstr = token.m_rules_.substring(startoftokenrule,
                                  startoftokenrule + cSize);
                token.m_CE_[0] |= getCaseBits(tokenstr);
            } else {
                // Copy it from the UCA
                int caseCE = getFirstCE(token.m_rules_.charAt(startoftokenrule));
                token.m_CE_[0] |= (caseCE & 0xC0);
            }
        }
    }

    /**
     * Count the number of non-zero bytes used in the ce
     * 
     * @param ce
     * @return number of non-zero bytes used in ce
     */
    private static final int countBytes(int ce) {
        int mask = 0xFFFFFFFF;
        int result = 0;
        while (mask != 0) {
            if ((ce & mask) != 0) {
                result++;
            }
            mask >>>= 8;
        }
        return result;
    }

    /**
     * We are ready to create collation elements
     * 
     * @param t
     *            build table to insert
     * @param lh
     *            rule token list header
     */
    private void createElements(BuildTable t,
            CollationRuleParser.TokenListHeader lh) {
        CollationRuleParser.Token tok = lh.m_first_;
        m_utilElement_.clear();
        while (tok != null) {
            // first, check if there are any expansions
            // if there are expansions, we need to do a little bit more
            // processing since parts of expansion can be tailored, while
            // others are not
            if (tok.m_expansion_ != 0) {
                int len = tok.m_expansion_ >>> 24;
                int currentSequenceLen = len;
                int expOffset = tok.m_expansion_ & 0x00FFFFFF;
                m_utilToken_.m_source_ = currentSequenceLen | expOffset;
                m_utilToken_.m_rules_ = m_parser_.m_source_;

                while (len > 0) {
                    currentSequenceLen = len;
                    while (currentSequenceLen > 0) {
                        m_utilToken_.m_source_ = (currentSequenceLen << 24)
                                | expOffset;
                        CollationRuleParser.Token expt = m_parser_.m_hashTable_.get(m_utilToken_);
                        if (expt != null
                                && expt.m_strength_ != CollationRuleParser.TOKEN_RESET_) {
                            // expansion is tailored
                            int noOfCEsToCopy = expt.m_CELength_;
                            for (int j = 0; j < noOfCEsToCopy; j++) {
                                tok.m_expCE_[tok.m_expCELength_ + j] = expt.m_CE_[j];
                            }
                            tok.m_expCELength_ += noOfCEsToCopy;
                            // never try to add codepoints and CEs.
                            // For some odd reason, it won't work.
                            expOffset += currentSequenceLen; // noOfCEsToCopy;
                            len -= currentSequenceLen; // noOfCEsToCopy;
                            break;
                        } else {
                            currentSequenceLen--;
                        }
                    }
                    if (currentSequenceLen == 0) {
                        // couldn't find any tailored subsequence, will have to
                        // get one from UCA. first, get the UChars from the
                        // rules then pick CEs out until there is no more and
                        // stuff them into expansion
                        m_utilColEIter_.setText(m_parser_.m_source_.substring(
                                expOffset, expOffset + 1));
                        while (true) {
                            int order = m_utilColEIter_.next();
                            if (order == CollationElementIterator.NULLORDER) {
                                break;
                            }
                            tok.m_expCE_[tok.m_expCELength_++] = order;
                        }
                        expOffset++;
                        len--;
                    }
                }
            } else {
                tok.m_expCELength_ = 0;
            }

            // set the ucaelement with obtained values
            m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_;

            // copy CEs
            System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0,
                    tok.m_CELength_);
            System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_,
                    tok.m_CELength_, tok.m_expCELength_);

            // copy UChars
            // We kept prefix and source kind of together, as it is a kind of a
            // contraction.
            // However, now we have to slice the prefix off the main thing -
            m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
            m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
            if (tok.m_prefix_ != 0) {
                // we will just copy the prefix here, and adjust accordingly in
                // the addPrefix function in ucol_elm. The reason is that we
                // need to add both composed AND decomposed elements to the
                // unsafe table.
                int size = tok.m_prefix_ >> 24;
                int offset = tok.m_prefix_ & 0x00FFFFFF;
                m_utilElement_.m_prefixChars_ = m_parser_.m_source_.substring(
                        offset, offset + size);
                size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
                offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
                m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
                        offset, offset + size);
            } else {
                m_utilElement_.m_prefixChars_ = null;
                int offset = tok.m_source_ & 0x00FFFFFF;
                int size = tok.m_source_ >>> 24;
                m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
                        offset, offset + size);
            }
            m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;

            boolean containCombinMarks = false;
            for (int i = 0; i < m_utilElement_.m_cPoints_.length()
                    - m_utilElement_.m_cPointsOffset_; i++) {
                if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) {
                    t.m_collator_.m_isJamoSpecial_ = true;
                    break;
                }
                if (!buildCMTabFlag) {
                    // check combining class
                    int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(i));  // TODO: review for handling supplementary characters
                    if ((fcd & 0xff) == 0) {
                        // reset flag when current char is not combining mark.
                        containCombinMarks = false;
                    } else {
                        containCombinMarks = true;
                    }
                }
            }

            if (!buildCMTabFlag && containCombinMarks) {
                buildCMTabFlag = true;
            }

            /***
             * // Case bits handling m_utilElement_.m_CEs_[0] &= 0xFFFFFF3F; //
             * Clean the case bits field if (m_utilElement_.m_cPoints_.length()
             * - m_utilElement_.m_cPointsOffset_ > 1) { // Do it manually
             * m_utilElement_.m_CEs_[0] |=
             * getCaseBits(m_utilElement_.m_cPoints_); } else { // Copy it from
             * the UCA int caseCE =
             * getFirstCE(m_utilElement_.m_cPoints_.charAt(0));
             * m_utilElement_.m_CEs_[0] |= (caseCE & 0xC0); }
             ***/
            // and then, add it
            addAnElement(t, m_utilElement_);
            tok = tok.m_next_;
        }
    }

    /**
     * Testing if the string argument has case
     * 
     * @param src
     *            string
     * @return the case for this char array
     * @exception Exception
     *                thrown when internal program error occurs
     */
    private final int getCaseBits(String src) throws Exception {
        int uCount = 0;
        int lCount = 0;
        src = Normalizer.decompose(src, true);
        m_utilColEIter_.setText(src);
        for (int i = 0; i < src.length(); i++) {
            m_utilColEIter_.setText(src.substring(i, i + 1));
            int order = m_utilColEIter_.next();
            if (RuleBasedCollator.isContinuation(order)) {
                throw new Exception("Internal program error");
            }
            if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_) == UPPER_CASE_) {
                uCount++;
            } else {
                char ch = src.charAt(i);
                if (UCharacter.isLowerCase(ch)) {
                    lCount++;
                } else {
                    if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) {
                        lCount++;
                    }
                }
            }
        }

        if (uCount != 0 && lCount != 0) {
            return MIXED_CASE_;
        } else if (uCount != 0) {
            return UPPER_CASE_;
        } else {
            return LOWER_CASE_;
        }
    }

    /**
     * Converts a char to the uppercase Kana
     * 
     * @param ch
     *            character to convert
     * @return the converted Kana character
     */
    private static final char toLargeKana(char ch) {
        if (0x3042 < ch && ch < 0x30ef) { // Kana range
            switch (ch - 0x3000) {
            case 0x41:
            case 0x43:
            case 0x45:
            case 0x47:
            case 0x49:
            case 0x63:
            case 0x83:
            case 0x85:
            case 0x8E:
            case 0xA1:
            case 0xA3:
            case 0xA5:
            case 0xA7:
            case 0xA9:
            case 0xC3:
            case 0xE3:
            case 0xE5:
            case 0xEE:
                ch++;
                break;
            case 0xF5:
                ch = 0x30AB;
                break;
            case 0xF6:
                ch = 0x30B1;
                break;
            }
        }
        return ch;
    }

    /**
     * Converts a char to the lowercase Kana
     * 
     * @param ch
     *            character to convert
     * @return the converted Kana character
     */
    private static final char toSmallKana(char ch) {
        if (0x3042 < ch && ch < 0x30ef) { // Kana range
            switch (ch - 0x3000) {
            case 0x42:
            case 0x44:
            case 0x46:
            case 0x48:
            case 0x4A:
            case 0x64:
            case 0x84:
            case 0x86:
            case 0x8F:
            case 0xA2:
            case 0xA4:
            case 0xA6:
            case 0xA8:
            case 0xAA:
            case 0xC4:
            case 0xE4:
            case 0xE6:
            case 0xEF:
                ch--;
                break;
            case 0xAB:
                ch = 0x30F5;
                break;
            case 0xB1:
                ch = 0x30F6;
                break;
            }
        }
        return ch;
    }

    /**
     * This should be connected to special Jamo handling.
     */
    private int getFirstCE(char ch) {
        m_utilColEIter_.setText(UCharacter.toString(ch));
        return m_utilColEIter_.next();
    }

    /**
     * This adds a read element, while testing for existence
     * 
     * @param t
     *            build table
     * @param element
     * @return ce
     */
    private int addAnElement(BuildTable t, Elements element) {
        List expansions = t.m_expansions_;
        element.m_mapCE_ = 0;

        if (element.m_CELength_ == 1) {
            element.m_mapCE_ = element.m_CEs_[0];

        } else {
            // unfortunately, it looks like we have to look for a long primary
            // here since in canonical closure we are going to hit some long
            // primaries from the first phase, and they will come back as
            // continuations/expansions destroying the effect of the previous
            // opitimization. A long primary is a three byte primary with
            // starting secondaries and tertiaries. It can appear in long runs
            // of only primary differences (like east Asian tailorings) also,
            // it should not be an expansion, as expansions would break with
            // this
            if (element.m_CELength_ == 2 // a two CE expansion
                    && RuleBasedCollator.isContinuation(element.m_CEs_[1])
                    && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
                                                                                                              // has
                                                                                                              // only
                                                                                                              // primaries
                                                                                                              // in
                                                                                                              // continuation
                    && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
                    // a common secondary
                    && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_) // and
                                                                                      // a
                                                                                      // common
                                                                                      // tertiary
            ) {
                element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
                // a long primary special
                        | (CE_LONG_PRIMARY_TAG_ << 24)
                        // first and second byte of primary
                        | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
                        // third byte of primary
                        | ((element.m_CEs_[1] >> 24) & 0xFF);
            } else {
                // omitting expansion offset in builder
                // (HEADER_SIZE_ >> 2)
                int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
                        | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
                        | (addExpansion(expansions, element.m_CEs_[0]) << 4)
                        & 0xFFFFF0;

                for (int i = 1; i < element.m_CELength_; i++) {
                    addExpansion(expansions, element.m_CEs_[i]);
                }
                if (element.m_CELength_ <= 0xF) {
                    expansion |= element.m_CELength_;
                } else {
                    addExpansion(expansions, 0);
                }
                element.m_mapCE_ = expansion;
                setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
                        (byte) element.m_CELength_, t.m_maxExpansions_);
                if (isJamo(element.m_cPoints_.charAt(0))) {
                    t.m_collator_.m_isJamoSpecial_ = true;
                    setMaxJamoExpansion(element.m_cPoints_.charAt(0),
                            element.m_CEs_[element.m_CELength_ - 1],
                            (byte) element.m_CELength_, t.m_maxJamoExpansions_);
                }
            }
        }

        // We treat digits differently - they are "uber special" and should be
        // processed differently if numeric collation is on.
        int uniChar = 0;
        if ((element.m_uchars_.length() == 2)
                && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) {
            uniChar = UCharacterProperty.getRawSupplementary(element.m_uchars_
                    .charAt(0), element.m_uchars_.charAt(1));
        } else if (element.m_uchars_.length() == 1) {
            uniChar = element.m_uchars_.charAt(0);
        }

        // Here, we either have one normal CE OR mapCE is set. Therefore, we
        // stuff only one element to the expansion buffer. When we encounter a
        // digit and we don't do numeric collation, we will just pick the CE
        // we have and break out of case (see ucol.cpp ucol_prv_getSpecialCE
        // && ucol_prv_getSpecialPrevCE). If we picked a special, further
        // processing will occur. If it's a simple CE, we'll return due
        // to how the loop is constructed.
        if (uniChar != 0 && UCharacter.isDigit(uniChar)) {
            // prepare the element
            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
                    | (CollationElementIterator.CE_DIGIT_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
                    | 1;
            if (element.m_mapCE_ != 0) {
                // if there is an expansion, we'll pick it here
                expansion |= (addExpansion(expansions, element.m_mapCE_) << 4);
            } else {
                expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4);
            }
            element.m_mapCE_ = expansion;
        }

        // here we want to add the prefix structure.
        // I will try to process it as a reverse contraction, if possible.
        // prefix buffer is already reversed.

        if (element.m_prefixChars_ != null
                && element.m_prefixChars_.length() - element.m_prefix_ > 0) {
            // We keep the seen prefix starter elements in a hashtable we need
            // it to be able to distinguish between the simple codepoints and
            // prefix starters. Also, we need to use it for canonical closure.
            m_utilElement2_.m_caseBit_ = element.m_caseBit_;
            m_utilElement2_.m_CELength_ = element.m_CELength_;
            m_utilElement2_.m_CEs_ = element.m_CEs_;
            m_utilElement2_.m_mapCE_ = element.m_mapCE_;
            // m_utilElement2_.m_prefixChars_ = element.m_prefixChars_;
            m_utilElement2_.m_sizePrim_ = element.m_sizePrim_;
            m_utilElement2_.m_sizeSec_ = element.m_sizeSec_;
            m_utilElement2_.m_sizeTer_ = element.m_sizeTer_;
            m_utilElement2_.m_variableTop_ = element.m_variableTop_;
            m_utilElement2_.m_prefix_ = element.m_prefix_;
            m_utilElement2_.m_prefixChars_ = Normalizer.compose(
                    element.m_prefixChars_, false);
            m_utilElement2_.m_uchars_ = element.m_uchars_;
            m_utilElement2_.m_cPoints_ = element.m_cPoints_;
            m_utilElement2_.m_cPointsOffset_ = 0;

            if (t.m_prefixLookup_ != null) {
                Elements uCE = t.m_prefixLookup_.get(element);
                if (uCE != null) {
                    // there is already a set of code points here
                    element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element);
                } else { // no code points, so this spot is clean
                    element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element);
                    uCE = new Elements(element);
                    uCE.m_cPoints_ = uCE.m_uchars_;
                    t.m_prefixLookup_.put(uCE, uCE);
                }
                if (m_utilElement2_.m_prefixChars_.length() != element.m_prefixChars_
                        .length()
                        - element.m_prefix_
                        || !m_utilElement2_.m_prefixChars_.regionMatches(0,
                                element.m_prefixChars_, element.m_prefix_,
                                m_utilElement2_.m_prefixChars_.length())) {
                    // do it!
                    m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_,
                            m_utilElement2_);
                }
            }
        }

        // We need to use the canonical iterator here
        // the way we do it is to generate the canonically equivalent strings
        // for the contraction and then add the sequences that pass FCD check
        if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1
                && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2
                        && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) && UTF16
                        .isTrailSurrogate(element.m_cPoints_.charAt(1)))) {
            // this is a contraction, we should check whether a composed form
            // should also be included
            m_utilCanIter_.setSource(element.m_cPoints_);
            String source = m_utilCanIter_.next();
            while (source != null && source.length() > 0) {
                if (Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.NO) {
                    element.m_uchars_ = source;
                    element.m_cPoints_ = element.m_uchars_;
                    finalizeAddition(t, element);
                }
                source = m_utilCanIter_.next();
            }

            return element.m_mapCE_;
        } else {
            return finalizeAddition(t, element);
        }
    }

    /**
     * Adds an expansion ce to the expansion vector
     * 
     * @param expansions
     *            vector to add to
     * @param value
     *            of the expansion
     * @return the current position of the new element
     */
    private static final int addExpansion(List expansions, int value) {
        expansions.add(Integer.valueOf(value));
        return expansions.size() - 1;
    }

    /**
     * Looks for the maximum length of all expansion sequences ending with the
     * same collation element. The size required for maxexpansion and maxsize is
     * returned if the arrays are too small.
     * 
     * @param endexpansion
     *            the last expansion collation element to be added
     * @param expansionsize
     *            size of the expansion
     * @param maxexpansion
     *            data structure to store the maximum expansion data.
     * @returns size of the maxexpansion and maxsize used.
     */
    private static int setMaxExpansion(int endexpansion, byte expansionsize,
            MaxExpansionTable maxexpansion) {
        int start = 0;
        int limit = maxexpansion.m_endExpansionCE_.size();
        long unsigned = (long) endexpansion;
        unsigned &= 0xFFFFFFFFl;

        // using binary search to determine if last expansion element is
        // already in the array
        int result = -1;
        if (limit > 0) {
            while (start < limit - 1) {
                int mid = (start + limit) >> 1;
                long unsignedce = (maxexpansion.m_endExpansionCE_
                        .get(mid)).intValue();
                unsignedce &= 0xFFFFFFFFl;
                if (unsigned < unsignedce) {
                    limit = mid;
                } else {
                    start = mid;
                }
            }
    
            if ((maxexpansion.m_endExpansionCE_.get(start)).intValue() == endexpansion) {
                result = start;
            }
        }
        if (result > -1) {
            // found the ce in expansion, we'll just modify the size if it
            // is smaller
            Object currentsize = maxexpansion.m_expansionCESize_.get(result);
            if (((Byte) currentsize).byteValue() < expansionsize) {
                maxexpansion.m_expansionCESize_.set(result, Byte.valueOf(
                            expansionsize));
            }
        } else {
            // we'll need to squeeze the value into the array. initial
            // implementation. shifting the subarray down by 1
            maxexpansion.m_endExpansionCE_.add(start + 1, Integer.valueOf(endexpansion));
            maxexpansion.m_expansionCESize_.add(start + 1, Byte.valueOf(expansionsize));
        }
        return maxexpansion.m_endExpansionCE_.size();
    }

    /**
     * Sets the maximum length of all jamo expansion sequences ending with the
     * same collation element. The size required for maxexpansion and maxsize is
     * returned if the arrays are too small.
     * 
     * @param ch
     *            the jamo codepoint
     * @param endexpansion
     *            the last expansion collation element to be added
     * @param expansionsize
     *            size of the expansion
     * @param maxexpansion
     *            data structure to store the maximum expansion data.
     * @returns size of the maxexpansion and maxsize used.
     */
    private static int setMaxJamoExpansion(char ch, int endexpansion,
            byte expansionsize, MaxJamoExpansionTable maxexpansion) {
        boolean isV = true;
        if (ch >= 0x1100 && ch <= 0x1112) {
            // determines L for Jamo, doesn't need to store this since it is
            // never at the end of a expansion
            if (maxexpansion.m_maxLSize_ < expansionsize) {
                maxexpansion.m_maxLSize_ = expansionsize;
            }
            return maxexpansion.m_endExpansionCE_.size();
        }

        if (ch >= 0x1161 && ch <= 0x1175) {
            // determines V for Jamo
            if (maxexpansion.m_maxVSize_ < expansionsize) {
                maxexpansion.m_maxVSize_ = expansionsize;
            }
        }

        if (ch >= 0x11A8 && ch <= 0x11C2) {
            isV = false;
            // determines T for Jamo
            if (maxexpansion.m_maxTSize_ < expansionsize) {
                maxexpansion.m_maxTSize_ = expansionsize;
            }
        }

        int pos = maxexpansion.m_endExpansionCE_.size();
        while (pos > 0) {
            pos--;
            if ((maxexpansion.m_endExpansionCE_.get(pos)).intValue() == endexpansion) {
                return maxexpansion.m_endExpansionCE_.size();
            }
        }
        maxexpansion.m_endExpansionCE_.add(Integer.valueOf(endexpansion));
        maxexpansion.m_isV_.add(isV ? Boolean.TRUE : Boolean.FALSE);

        return maxexpansion.m_endExpansionCE_.size();
    }

    /**
     * Adds a prefix to the table
     * 
     * @param t
     *            build table to update
     * @param CE
     *            collation element to add
     * @param element
     *            rule element to add
     * @return modified ce
     */
    private int addPrefix(BuildTable t, int CE, Elements element) {
        // currently the longest prefix we're supporting in Japanese is two
        // characters long. Although this table could quite easily mimic
        // complete contraction stuff there is no good reason to make a general
        // solution, as it would require some error prone messing.
        ContractionTable contractions = t.m_contractions_;
        String oldCP = element.m_cPoints_;
        int oldCPOffset = element.m_cPointsOffset_;

        contractions.m_currentTag_ = CE_SPEC_PROC_TAG_;
        // here, we will normalize & add prefix to the table.
        int size = element.m_prefixChars_.length() - element.m_prefix_;
        for (int j = 1; j < size; j++) {
            // First add NFD prefix chars to unsafe CP hash table
            // Unless it is a trail surrogate, which is handled algoritmically
            // and shouldn't take up space in the table.
            char ch = element.m_prefixChars_.charAt(j + element.m_prefix_);
            if (!UTF16.isTrailSurrogate(ch)) {
                unsafeCPSet(t.m_unsafeCP_, ch);
            }
        }

        // StringBuffer reversed = new StringBuffer();
        m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
        for (int j = 0; j < size; j++) {
            // prefixes are going to be looked up backwards
            // therefore, we will promptly reverse the prefix buffer...
            int offset = element.m_prefixChars_.length() - j - 1;
            m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset));
        }
        element.m_prefixChars_ = m_utilStringBuffer_.toString();
        element.m_prefix_ = 0;

        // the first codepoint is also unsafe, as it forms a 'contraction' with
        // the prefix
        if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) {
            unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0));
        }

        element.m_cPoints_ = element.m_prefixChars_;
        element.m_cPointsOffset_ = element.m_prefix_;

        // Add the last char of the contraction to the contraction-end hash
        // table. unless it is a trail surrogate, which is handled
        // algorithmically and shouldn't be in the table
        if (!UTF16.isTrailSurrogate(element.m_cPoints_
                .charAt(element.m_cPoints_.length() - 1))) {
            ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_
                    .charAt(element.m_cPoints_.length() - 1));
        }
        // First we need to check if contractions starts with a surrogate
        // int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);

        // If there are any Jamos in the contraction, we should turn on special
        // processing for Jamos
        if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) {
            t.m_collator_.m_isJamoSpecial_ = true;
        }
        // then we need to deal with it
        // we could aready have something in table - or we might not
        if (!isPrefix(CE)) {
            // if it wasn't contraction, we wouldn't end up here
            int firstContractionOffset = addContraction(contractions,
                    CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
            int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
            addContraction(contractions, firstContractionOffset,
                    element.m_prefixChars_.charAt(element.m_prefix_), newCE);
            addContraction(contractions, firstContractionOffset, (char) 0xFFFF,
                    CE);
            CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset);
        } else {
            // we are adding to existing contraction
            // there were already some elements in the table, so we need to add
            // a new contraction
            // Two things can happen here: either the codepoint is already in
            // the table, or it is not
            char ch = element.m_prefixChars_.charAt(element.m_prefix_);
            int position = findCP(contractions, CE, ch);
            if (position > 0) {
                // if it is we just continue down the chain
                int eCE = getCE(contractions, CE, position);
                int newCE = processContraction(contractions, element, eCE);
                setContraction(contractions, CE, position, ch, newCE);
            } else {
                // if it isn't, we will have to create a new sequence
                processContraction(contractions, element, CE_NOT_FOUND_);
                insertContraction(contractions, CE, ch, element.m_mapCE_);
            }
        }

        element.m_cPoints_ = oldCP;
        element.m_cPointsOffset_ = oldCPOffset;

        return CE;
    }

    /**
     * Checks if the argument ce is a contraction
     * 
     * @param CE
     *            collation element
     * @return true if argument ce is a contraction
     */
    private static final boolean isContraction(int CE) {
        return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_);
    }

    /**
     * Checks if the argument ce has a prefix
     * 
     * @param CE
     *            collation element
     * @return true if argument ce has a prefix
     */
    private static final boolean isPrefix(int CE) {
        return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_);
    }

    /**
     * Checks if the argument ce is special
     * 
     * @param CE
     *            collation element
     * @return true if argument ce is special
     */
    private static final boolean isSpecial(int CE) {
        return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000;
    }

    /**
     * Checks if the argument ce has a prefix
     * 
     * @param CE
     *            collation element
     * @return true if argument ce has a prefix
     */
    private static final int getCETag(int CE) {
        return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> RuleBasedCollator.CE_TAG_SHIFT_;
    }

    /**
     * Gets the ce at position in contraction table
     * 
     * @param table
     *            contraction table
     * @param position
     *            offset to the contraction table
     * @return ce
     */
    private static final int getCE(ContractionTable table, int element,
            int position) {
        element &= 0xFFFFFF;
        BasicContractionTable tbl = getBasicContractionTable(table, element);

        if (tbl == null) {
            return CE_NOT_FOUND_;
        }
        if (position > tbl.m_CEs_.size() || position == -1) {
            return CE_NOT_FOUND_;
        } else {
            return tbl.m_CEs_.get(position).intValue();
        }
    }

    /**
     * Sets the unsafe character
     * 
     * @param table
     *            unsafe table
     * @param c
     *            character to be added
     */
    private static final void unsafeCPSet(byte table[], char c) {
        int hash = c;
        if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
            if (hash >= 0xd800 && hash <= 0xf8ff) {
                // Part of a surrogate, or in private use area.
                // These don't go in the table
                return;
            }
            hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
        }
        table[hash >> 3] |= (1 << (hash & 7));
    }

    /**
     * Sets the contraction end character
     * 
     * @param table
     *            contraction end table
     * @param c
     *            character to be added
     */
    private static final void ContrEndCPSet(byte table[], char c) {
        int hash = c;
        if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
            hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
        }
        table[hash >> 3] |= (1 << (hash & 7));
    }

    /**
     * Adds more contractions in table. If element is non existant, it creates
     * on. Returns element handle
     * 
     * @param table
     *            contraction table
     * @param element
     *            offset to the contraction table
     * @param codePoint
     *            codepoint to add
     * @param value
     * @return collation element
     */
    private static int addContraction(ContractionTable table, int element,
            char codePoint, int value) {
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            tbl = addAContractionElement(table);
            element = table.m_elements_.size() - 1;
        }

        tbl.m_CEs_.add(Integer.valueOf(value));
        tbl.m_codePoints_.append(codePoint);
        return constructSpecialCE(table.m_currentTag_, element);
    }

    /**
     * Adds a contraction element to the table
     * 
     * @param table
     *            contraction table to update
     * @return contraction
     */
    private static BasicContractionTable addAContractionElement(
            ContractionTable table) {
        BasicContractionTable result = new BasicContractionTable();
        table.m_elements_.add(result);
        return result;
    }

    /**
     * Constructs a special ce
     * 
     * @param tag
     *            special tag
     * @param CE
     *            collation element
     * @return a contraction ce
     */
    private static final int constructSpecialCE(int tag, int CE) {
        return RuleBasedCollator.CE_SPECIAL_FLAG_
                | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF);
    }

    /**
     * Sets and inserts the element that has a contraction
     * 
     * @param contractions
     *            contraction table
     * @param element
     *            contracting element
     * @param existingCE
     * @return contraction ce
     */
    private static int processContraction(ContractionTable contractions,
            Elements element, int existingCE) {
        int firstContractionOffset = 0;
        // end of recursion
        if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) {
            if (isContractionTableElement(existingCE)
                    && getCETag(existingCE) == contractions.m_currentTag_) {
                changeContraction(contractions, existingCE, (char) 0,
                        element.m_mapCE_);
                changeContraction(contractions, existingCE, (char) 0xFFFF,
                        element.m_mapCE_);
                return existingCE;
            } else {
                // can't do just that. existingCe might be a contraction,
                // meaning that we need to do another step
                return element.m_mapCE_;
            }
        }

        // this recursion currently feeds on the only element we have...
        // We will have to copy it in order to accomodate for both backward
        // and forward cycles
        // we encountered either an empty space or a non-contraction element
        // this means we are constructing a new contraction sequence
        element.m_cPointsOffset_++;
        if (!isContractionTableElement(existingCE)) {
            // if it wasn't contraction, we wouldn't end up here
            firstContractionOffset = addContraction(contractions,
                    CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, existingCE);
            int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
            addContraction(contractions, firstContractionOffset,
                    element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
            addContraction(contractions, firstContractionOffset, (char) 0xFFFF,
                    existingCE);
            existingCE = constructSpecialCE(contractions.m_currentTag_,
                    firstContractionOffset);
        } else {
            // we are adding to existing contraction
            // there were already some elements in the table, so we need to add
            // a new contraction
            // Two things can happen here: either the codepoint is already in
            // the table, or it is not
            int position = findCP(contractions, existingCE, element.m_cPoints_
                    .charAt(element.m_cPointsOffset_));
            if (position > 0) {
                // if it is we just continue down the chain
                int eCE = getCE(contractions, existingCE, position);
                int newCE = processContraction(contractions, element, eCE);
                setContraction(contractions, existingCE, position,
                        element.m_cPoints_.charAt(element.m_cPointsOffset_),
                        newCE);
            } else {
                // if it isn't, we will have to create a new sequence
                int newCE = processContraction(contractions, element,
                        CE_NOT_FOUND_);
                insertContraction(contractions, existingCE, element.m_cPoints_
                        .charAt(element.m_cPointsOffset_), newCE);
            }
        }
        element.m_cPointsOffset_--;
        return existingCE;
    }

    /**
     * Checks if CE belongs to the contraction table
     * 
     * @param CE
     *            collation element to test
     * @return true if CE belongs to the contraction table
     */
    private static final boolean isContractionTableElement(int CE) {
        return isSpecial(CE)
                && (getCETag(CE) == CE_CONTRACTION_TAG_ || getCETag(CE) == CE_SPEC_PROC_TAG_);
    }

    /**
     * Gets the codepoint
     * 
     * @param table
     *            contraction table
     * @param element
     *            offset to the contraction element in the table
     * @param codePoint
     *            code point to look for
     * @return the offset to the code point
     */
    private static int findCP(ContractionTable table, int element,
            char codePoint) {
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            return -1;
        }

        int position = 0;
        while (codePoint > tbl.m_codePoints_.charAt(position)) {
            position++;
            if (position > tbl.m_codePoints_.length()) {
                return -1;
            }
        }
        if (codePoint == tbl.m_codePoints_.charAt(position)) {
            return position;
        } else {
            return -1;
        }
    }

    /**
     * Gets the contraction element out of the contraction table
     * 
     * @param table
     *            contraction table
     * @param offset
     *            to the element in the contraction table
     * @return basic contraction element at offset in the contraction table
     */
    private static final BasicContractionTable getBasicContractionTable(
            ContractionTable table, int offset) {
        offset &= 0xFFFFFF;
        if (offset == 0xFFFFFF) {
            return null;
        }
        return table.m_elements_.get(offset);
    }

    /**
     * Changes the contraction element
     * 
     * @param table
     *            contraction table
     * @param element
     *            offset to the element in the contraction table
     * @param codePoint
     *            codepoint
     * @param newCE
     *            new collation element
     * @return basic contraction element at offset in the contraction table
     */
    private static final int changeContraction(ContractionTable table,
            int element, char codePoint, int newCE) {
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            return 0;
        }
        int position = 0;
        while (codePoint > tbl.m_codePoints_.charAt(position)) {
            position++;
            if (position > tbl.m_codePoints_.length()) {
                return CE_NOT_FOUND_;
            }
        }
        if (codePoint == tbl.m_codePoints_.charAt(position)) {
            tbl.m_CEs_.set(position, Integer.valueOf(newCE));
            return element & 0xFFFFFF;
        } else {
            return CE_NOT_FOUND_;
        }
    }

    /**
     * Sets a part of contraction sequence in table. If element is non existant,
     * it creates on. Returns element handle.
     * 
     * @param table
     *            contraction table
     * @param element
     *            offset to the contraction table
     * @param offset
     * @param codePoint
     *            contraction character
     * @param value
     *            ce value
     * @return new contraction ce
     */
    private static final int setContraction(ContractionTable table,
            int element, int offset, char codePoint, int value) {
        element &= 0xFFFFFF;
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            tbl = addAContractionElement(table);
            element = table.m_elements_.size() - 1;
        }

        tbl.m_CEs_.set(offset, Integer.valueOf(value));
        tbl.m_codePoints_.setCharAt(offset, codePoint);
        return constructSpecialCE(table.m_currentTag_, element);
    }

    /**
     * Inserts a part of contraction sequence in table. Sequences behind the
     * offset are moved back. If element is non existent, it creates on.
     * 
     * @param table
     *            contraction
     * @param element
     *            offset to the table contraction
     * @param codePoint
     *            code point
     * @param value
     *            collation element value
     * @return contraction collation element
     */
    private static final int insertContraction(ContractionTable table,
            int element, char codePoint, int value) {
        element &= 0xFFFFFF;
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            tbl = addAContractionElement(table);
            element = table.m_elements_.size() - 1;
        }

        int offset = 0;
        while (tbl.m_codePoints_.charAt(offset) < codePoint
                && offset < tbl.m_codePoints_.length()) {
            offset++;
        }

        tbl.m_CEs_.add(offset, Integer.valueOf(value));
        tbl.m_codePoints_.insert(offset, codePoint);

        return constructSpecialCE(table.m_currentTag_, element);
    }

    /**
     * Finalize addition
     * 
     * @param t
     *            build table
     * @param element
     *            to add
     */
    private final static int finalizeAddition(BuildTable t, Elements element) {
        int CE = CE_NOT_FOUND_;
        // This should add a completely ignorable element to the
        // unsafe table, so that backward iteration will skip
        // over it when treating contractions.
        if (element.m_mapCE_ == 0) {
            for (int i = 0; i < element.m_cPoints_.length(); i++) {
                char ch = element.m_cPoints_.charAt(i);
                if (!UTF16.isTrailSurrogate(ch)) {
                    unsafeCPSet(t.m_unsafeCP_, ch);
                }
            }
        }

        if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) {
            // we're adding a contraction
            int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
            CE = t.m_mapping_.getValue(cp);
            CE = addContraction(t, CE, element);
        } else {
            // easy case
            CE = t.m_mapping_.getValue(element.m_cPoints_
                    .charAt(element.m_cPointsOffset_));

            if (CE != CE_NOT_FOUND_) {
                if (isContractionTableElement(CE)) {
                    // adding a non contraction element (thai, expansion,
                    // single) to already existing contraction
                    if (!isPrefix(element.m_mapCE_)) {
                        // we cannot reenter prefix elements - as we are going
                        // to create a dead loop
                        // Only expansions and regular CEs can go here...
                        // Contractions will never happen in this place
                        setContraction(t.m_contractions_, CE, 0, (char) 0,
                                element.m_mapCE_);
                        // This loop has to change the CE at the end of
                        // contraction REDO!
                        changeLastCE(t.m_contractions_, CE, element.m_mapCE_);
                    }
                } else {
                    t.m_mapping_
                            .setValue(element.m_cPoints_
                                    .charAt(element.m_cPointsOffset_),
                                    element.m_mapCE_);
                    if (element.m_prefixChars_ != null
                            && element.m_prefixChars_.length() > 0
                            && getCETag(CE) != CE_IMPLICIT_TAG_) {
                        // Add CE for standalone precontext char.
                        Elements origElem = new Elements();
                        origElem.m_prefixChars_ = null;
                        origElem.m_uchars_ = element.m_cPoints_;
                        origElem.m_cPoints_ = origElem.m_uchars_;
                        origElem.m_CEs_[0] = CE;
                        origElem.m_mapCE_ = CE;
                        origElem.m_CELength_ = 1;
                        finalizeAddition(t, origElem);
                    }
                }
            } else {
                t.m_mapping_.setValue(element.m_cPoints_
                        .charAt(element.m_cPointsOffset_), element.m_mapCE_);
            }
        }
        return CE;
    }

    /**
     * Note regarding surrogate handling: We are interested only in the single
     * or leading surrogates in a contraction. If a surrogate is somewhere else
     * in the contraction, it is going to be handled as a pair of code units, as
     * it doesn't affect the performance AND handling surrogates specially would
     * complicate code way too much.
     */
    private static int addContraction(BuildTable t, int CE, Elements element) {
        ContractionTable contractions = t.m_contractions_;
        contractions.m_currentTag_ = CE_CONTRACTION_TAG_;

        // First we need to check if contractions starts with a surrogate
        int cp = UTF16.charAt(element.m_cPoints_, 0);
        int cpsize = 1;
        if (UCharacter.isSupplementary(cp)) {
            cpsize = 2;
        }
        if (cpsize < element.m_cPoints_.length()) {
            // This is a real contraction, if there are other characters after
            // the first
            int size = element.m_cPoints_.length() - element.m_cPointsOffset_;
            for (int j = 1; j < size; j++) {
                // First add contraction chars to unsafe CP hash table
                // Unless it is a trail surrogate, which is handled
                // algoritmically and shouldn't take up space in the table.
                if (!UTF16.isTrailSurrogate(element.m_cPoints_
                        .charAt(element.m_cPointsOffset_ + j))) {
                    unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_
                            .charAt(element.m_cPointsOffset_ + j));
                }
            }
            // Add the last char of the contraction to the contraction-end
            // hash table. unless it is a trail surrogate, which is handled
            // algorithmically and shouldn't be in the table
            if (!UTF16.isTrailSurrogate(element.m_cPoints_
                    .charAt(element.m_cPoints_.length() - 1))) {
                ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_
                        .charAt(element.m_cPoints_.length() - 1));
            }

            // If there are any Jamos in the contraction, we should turn on
            // special processing for Jamos
            if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) {
                t.m_collator_.m_isJamoSpecial_ = true;
            }
            // then we need to deal with it
            // we could aready have something in table - or we might not
            element.m_cPointsOffset_ += cpsize;
            if (!isContraction(CE)) {
                // if it wasn't contraction, we wouldn't end up here
                int firstContractionOffset = addContraction(contractions,
                        CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
                int newCE = processContraction(contractions, element,
                        CE_NOT_FOUND_);
                addContraction(contractions, firstContractionOffset,
                        element.m_cPoints_.charAt(element.m_cPointsOffset_),
                        newCE);
                addContraction(contractions, firstContractionOffset,
                        (char) 0xFFFF, CE);
                CE = constructSpecialCE(CE_CONTRACTION_TAG_,
                        firstContractionOffset);
            } else {
                // we are adding to existing contraction
                // there were already some elements in the table, so we need to
                // add a new contraction
                // Two things can happen here: either the codepoint is already
                // in the table, or it is not
                int position = findCP(contractions, CE, element.m_cPoints_
                        .charAt(element.m_cPointsOffset_));
                if (position > 0) {
                    // if it is we just continue down the chain
                    int eCE = getCE(contractions, CE, position);
                    int newCE = processContraction(contractions, element, eCE);
                    setContraction(
                            contractions,
                            CE,
                            position,
                            element.m_cPoints_.charAt(element.m_cPointsOffset_),
                            newCE);
                } else {
                    // if it isn't, we will have to create a new sequence
                    int newCE = processContraction(contractions, element,
                            CE_NOT_FOUND_);
                    insertContraction(contractions, CE, element.m_cPoints_
                            .charAt(element.m_cPointsOffset_), newCE);
                }
            }
            element.m_cPointsOffset_ -= cpsize;
            t.m_mapping_.setValue(cp, CE);
        } else if (!isContraction(CE)) {
            // this is just a surrogate, and there is no contraction
            t.m_mapping_.setValue(cp, element.m_mapCE_);
        } else {
            // fill out the first stage of the contraction with the surrogate
            // CE
            changeContraction(contractions, CE, (char) 0, element.m_mapCE_);
            changeContraction(contractions, CE, (char) 0xFFFF, element.m_mapCE_);
        }
        return CE;
    }

    /**
     * this is for adding non contractions
     * 
     * @param table
     *            contraction table
     * @param element
     *            offset to the contraction table
     * @param value
     *            collation element value
     * @return new collation element
     */
    private static final int changeLastCE(ContractionTable table, int element,
            int value) {
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            return 0;
        }

        tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, Integer.valueOf(value));
        return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
    }

    /**
     * Given a set of ranges calculated by allocWeights(), iterate through the
     * weights. Sets the next weight in cegenerator.m_current_.
     * 
     * @param cegenerator
     *            object that contains ranges weight range array and its
     *            rangeCount
     * @return the next weight
     */
    private static int nextWeight(CEGenerator cegenerator) {
        if (cegenerator.m_rangesLength_ > 0) {
            // get maxByte from the .count field
            int maxByte = cegenerator.m_ranges_[0].m_count_;
            // get the next weight
            int weight = cegenerator.m_ranges_[0].m_start_;
            if (weight == cegenerator.m_ranges_[0].m_end_) {
                // this range is finished, remove it and move the following
                // ones up
                cegenerator.m_rangesLength_--;
                if (cegenerator.m_rangesLength_ > 0) {
                    System.arraycopy(cegenerator.m_ranges_, 1,
                            cegenerator.m_ranges_, 0,
                            cegenerator.m_rangesLength_);
                    cegenerator.m_ranges_[0].m_count_ = maxByte;
                    // keep maxByte in ranges[0]
                }
            } else {
                // increment the weight for the next value
                cegenerator.m_ranges_[0].m_start_ = incWeight(weight,
                        cegenerator.m_ranges_[0].m_length2_, maxByte);
            }
            return weight;
        }
        return -1;
    }

    /**
     * Increment the collation weight
     * 
     * @param weight
     *            to increment
     * @param length
     * @param maxByte
     * @return new incremented weight
     */
    private static final int incWeight(int weight, int length, int maxByte) {
        while (true) {
            int b = getWeightByte(weight, length);
            if (b < maxByte) {
                return setWeightByte(weight, length, b + 1);
            } else {
                // roll over, set this byte to BYTE_FIRST_TAILORED_ and
                // increment the previous one
                weight = setWeightByte(weight, length,
                        RuleBasedCollator.BYTE_FIRST_TAILORED_);
                --length;
            }
        }
    }

    /**
     * Gets the weight byte
     * 
     * @param weight
     * @param index
     * @return byte
     */
    private static final int getWeightByte(int weight, int index) {
        return (weight >> ((4 - index) << 3)) & 0xff;
    }

    /**
     * Set the weight byte in table
     * 
     * @param weight
     * @param index
     * @param b
     *            byte
     */
    private static final int setWeightByte(int weight, int index, int b) {
        index <<= 3;
        // 0xffffffff except a 00 "hole" for the index-th byte
        int mask;
        if (index < 32) {
            mask = 0xffffffff >>> index;
        } else {
            // Do not use int>>>32 because that does not shift at all
            // while we need it to become 0.
            // 
            // Java Language Specification (Third Edition) 15.19 Shift Operators:
            // "If the promoted type of the left-hand operand is int,
            // only the five lowest-order bits of the right-hand operand
            // are used as the shift distance.
            // It is as if the right-hand operand were subjected to
            // a bitwise logical AND operator & (§15.22.1) with the mask value 0x1f.
            // The shift distance actually used is therefore
            // always in the range 0 to 31, inclusive."
            mask = 0;
        }
        index = 32 - index;
        mask |= 0xffffff00 << index;
        return (weight & mask) | (b << index);
    }

    /**
     * Call getWeightRanges and then determine heuristically which ranges to use
     * for a given number of weights between (excluding) two limits
     * 
     * @param lowerLimit
     * @param upperLimit
     * @param n
     * @param maxByte
     * @param ranges
     * @return
     */
    private int allocateWeights(int lowerLimit, int upperLimit, int n,
            int maxByte, WeightRange ranges[]) {
        // number of usable byte values 3..maxByte
        int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1;
        // [0] unused, [5] to make index checks unnecessary, m_utilCountBuffer_
        // countBytes to the power of index, m_utilLongBuffer_ for unsignedness
        // gcc requires explicit initialization
        m_utilLongBuffer_[0] = 1;
        m_utilLongBuffer_[1] = countBytes;
        m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes;
        m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes;
        m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes;
        int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte,
                countBytes, ranges);
        if (rangeCount <= 0) {
            return 0;
        }
        // what is the maximum number of weights with these ranges?
        long maxCount = 0;
        for (int i = 0; i < rangeCount; ++i) {
            maxCount += (long) ranges[i].m_count_
                    * m_utilLongBuffer_[4 - ranges[i].m_length_];
        }
        if (maxCount < n) {
            return 0;
        }
        // set the length2 and count2 fields
        for (int i = 0; i < rangeCount; ++i) {
            ranges[i].m_length2_ = ranges[i].m_length_;
            ranges[i].m_count2_ = ranges[i].m_count_;
        }
        // try until we find suitably large ranges
        while (true) {
            // get the smallest number of bytes in a range
            int minLength = ranges[0].m_length2_;
            // sum up the number of elements that fit into ranges of each byte
            // length
            Arrays.fill(m_utilCountBuffer_, 0);
            for (int i = 0; i < rangeCount; ++i) {
                m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_;
            }
            // now try to allocate n elements in the available short ranges
            if (n <= m_utilCountBuffer_[minLength]
                    + m_utilCountBuffer_[minLength + 1]) {
                // trivial cases, use the first few ranges
                maxCount = 0;
                rangeCount = 0;
                do {
                    maxCount += ranges[rangeCount].m_count2_;
                    ++rangeCount;
                } while (n > maxCount);
                break;
            } else if (n <= ranges[0].m_count2_ * countBytes) {
                // easy case, just make this one range large enough by
                // lengthening it once more, possibly split it
                rangeCount = 1;
                // calculate how to split the range between maxLength-1
                // (count1) and maxLength (count2)
                long power_1 = m_utilLongBuffer_[minLength
                        - ranges[0].m_length_];
                long power = power_1 * countBytes;
                int count2 = (int) ((n + power - 1) / power);
                int count1 = ranges[0].m_count_ - count2;
                // split the range
                if (count1 < 1) {
                    // lengthen the entire range to maxLength
                    lengthenRange(ranges, 0, maxByte, countBytes);
                } else {
                    // really split the range
                    // create a new range with the end and initial and current
                    // length of the old one
                    rangeCount = 2;
                    ranges[1].m_end_ = ranges[0].m_end_;
                    ranges[1].m_length_ = ranges[0].m_length_;
                    ranges[1].m_length2_ = minLength;
                    // set the end of the first range according to count1
                    int i = ranges[0].m_length_;
                    int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1;
                    // ranges[0].count and count1 may be >countBytes from
                    // merging adjacent ranges; b > maxByte is possible
                    if (b <= maxByte) {
                        ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i,
                                b);
                    } else {
                        ranges[0].m_end_ = setWeightByte(incWeight(
                                ranges[0].m_start_, i - 1, maxByte), i, b
                                - countBytes);
                    }
                    // set the bytes in the end weight at length + 1..length2
                    // to maxByte
                    b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8)
                            | maxByte; // this used to be 0xffffffff
                    ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i)
                            | (b >>> (i << 3)) & (b << ((4 - minLength) << 3));
                    // set the start of the second range to immediately follow
                    // the end of the first one
                    ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength,
                            maxByte);
                    // set the count values (informational)
                    ranges[0].m_count_ = count1;
                    ranges[1].m_count_ = count2;

                    ranges[0].m_count2_ = (int) (count1 * power_1);
                    // will be *countBytes when lengthened
                    ranges[1].m_count2_ = (int) (count2 * power_1);

                    // lengthen the second range to maxLength
                    lengthenRange(ranges, 1, maxByte, countBytes);
                }
                break;
            }
            // no good match, lengthen all minLength ranges and iterate
            for (int i = 0; ranges[i].m_length2_ == minLength; ++i) {
                lengthenRange(ranges, i, maxByte, countBytes);
            }
        }

        if (rangeCount > 1) {
            // sort the ranges by weight values
            Arrays.sort(ranges, 0, rangeCount);
        }

        // set maxByte in ranges[0] for ucol_nextWeight()
        ranges[0].m_count_ = maxByte;

        return rangeCount;
    }

    /**
     * Updates the range length
     * 
     * @param range
     *            weight range array
     * @param offset
     *            to weight range array
     * @param maxByte
     * @param countBytes
     * @return new length
     */
    private static final int lengthenRange(WeightRange range[], int offset,
            int maxByte, int countBytes) {
        int length = range[offset].m_length2_ + 1;
        range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length,
                RuleBasedCollator.BYTE_FIRST_TAILORED_);
        range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length,
                maxByte);
        range[offset].m_count2_ *= countBytes;
        range[offset].m_length2_ = length;
        return length;
    }

    /**
     * Gets the weight
     * 
     * @param weight
     * @param length
     * @param trail
     * @return new weight
     */
    private static final int setWeightTrail(int weight, int length, int trail) {
        length = (4 - length) << 3;
        return (weight & (0xffffff00 << length)) | (trail << length);
    }

    /**
     * take two CE weights and calculate the possible ranges of weights between
     * the two limits, excluding them for weights with up to 4 bytes there are
     * up to 2*4-1=7 ranges
     * 
     * @param lowerLimit
     * @param upperLimit
     * @param maxByte
     * @param countBytes
     * @param ranges
     * @return weight ranges
     */
    private int getWeightRanges(int lowerLimit, int upperLimit, int maxByte,
            int countBytes, WeightRange ranges[]) {
        // assume that both lowerLimit & upperLimit are not 0
        // get the lengths of the limits
        int lowerLength = lengthOfWeight(lowerLimit);
        int upperLength = lengthOfWeight(upperLimit);
        if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) {
            return 0;
        }
        // check that neither is a prefix of the other
        if (lowerLength < upperLength) {
            if (lowerLimit == truncateWeight(upperLimit, lowerLength)) {
                return 0;
            }
        }
        // if the upper limit is a prefix of the lower limit then the earlier
        // test lowerLimit >= upperLimit has caught it
        // reset local variables
        // With the limit lengths of 1..4, there are up to 7 ranges for
        // allocation:
        // range minimum length
        // lower[4] 4
        // lower[3] 3
        // lower[2] 2
        // middle 1
        // upper[2] 2
        // upper[3] 3
        // upper[4] 4
        // We are now going to calculate up to 7 ranges.
        // Some of them will typically overlap, so we will then have to merge
        // and eliminate ranges.

        // We have to clean cruft from previous invocations
        // before doing anything. C++ already does that
        for (int length = 0; length < 5; length++) {
            m_utilLowerWeightRange_[length].clear();
            m_utilUpperWeightRange_[length].clear();
        }
        m_utilWeightRange_.clear();

        int weight = lowerLimit;
        for (int length = lowerLength; length >= 2; --length) {
            m_utilLowerWeightRange_[length].clear();
            int trail = getWeightByte(weight, length);
            if (trail < maxByte) {
                m_utilLowerWeightRange_[length].m_start_ = incWeightTrail(
                        weight, length);
                m_utilLowerWeightRange_[length].m_end_ = setWeightTrail(weight,
                        length, maxByte);
                m_utilLowerWeightRange_[length].m_length_ = length;
                m_utilLowerWeightRange_[length].m_count_ = maxByte - trail;
            }
            weight = truncateWeight(weight, length - 1);
        }
        m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1);

        weight = upperLimit;
        // [0] and [1] are not used - this simplifies indexing,
        // m_utilUpperWeightRange_

        for (int length = upperLength; length >= 2; length--) {
            int trail = getWeightByte(weight, length);
            if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) {
                m_utilUpperWeightRange_[length].m_start_ = setWeightTrail(
                        weight, length, RuleBasedCollator.BYTE_FIRST_TAILORED_);
                m_utilUpperWeightRange_[length].m_end_ = decWeightTrail(weight,
                        length);
                m_utilUpperWeightRange_[length].m_length_ = length;
                m_utilUpperWeightRange_[length].m_count_ = trail
                        - RuleBasedCollator.BYTE_FIRST_TAILORED_;
            }
            weight = truncateWeight(weight, length - 1);
        }
        m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1);

        // set the middle range
        m_utilWeightRange_.m_length_ = 1;
        if (Utility.compareUnsigned(m_utilWeightRange_.m_end_,
                m_utilWeightRange_.m_start_) >= 0) {
            // if (m_utilWeightRange_.m_end_ >= m_utilWeightRange_.m_start_) {
            m_utilWeightRange_.m_count_ = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) >>> 24) + 1;
        } else {
            // eliminate overlaps
            // remove the middle range
            m_utilWeightRange_.m_count_ = 0;
            // reduce or remove the lower ranges that go beyond upperLimit
            for (int length = 4; length >= 2; --length) {
                if (m_utilLowerWeightRange_[length].m_count_ > 0
                        && m_utilUpperWeightRange_[length].m_count_ > 0) {
                    int start = m_utilUpperWeightRange_[length].m_start_;
                    int end = m_utilLowerWeightRange_[length].m_end_;
                    if (end >= start
                            || incWeight(end, length, maxByte) == start) {
                        // lower and upper ranges collide or are directly
                        // adjacent: merge these two and remove all shorter
                        // ranges
                        start = m_utilLowerWeightRange_[length].m_start_;
                        end = m_utilLowerWeightRange_[length].m_end_ = m_utilUpperWeightRange_[length].m_end_;
                        // merging directly adjacent ranges needs to subtract
                        // the 0/1 gaps in between;
                        // it may result in a range with count>countBytes
                        m_utilLowerWeightRange_[length].m_count_ = getWeightByte(
                                end, length)
                                - getWeightByte(start, length)
                                + 1
                                + countBytes
                                * (getWeightByte(end, length - 1) - getWeightByte(
                                        start, length - 1));
                        m_utilUpperWeightRange_[length].m_count_ = 0;
                        while (--length >= 2) {
                            m_utilLowerWeightRange_[length].m_count_ = m_utilUpperWeightRange_[length].m_count_ = 0;
                        }
                        break;
                    }
                }
            }
        }

        // copy the ranges, shortest first, into the result array
        int rangeCount = 0;
        if (m_utilWeightRange_.m_count_ > 0) {
            ranges[0] = new WeightRange(m_utilWeightRange_);
            rangeCount = 1;
        }
        for (int length = 2; length <= 4; ++length) {
            // copy upper first so that later the middle range is more likely
            // the first one to use
            if (m_utilUpperWeightRange_[length].m_count_ > 0) {
                ranges[rangeCount] = new WeightRange(
                        m_utilUpperWeightRange_[length]);
                ++rangeCount;
            }
            if (m_utilLowerWeightRange_[length].m_count_ > 0) {
                ranges[rangeCount] = new WeightRange(
                        m_utilLowerWeightRange_[length]);
                ++rangeCount;
            }
        }
        return rangeCount;
    }

    /**
     * Truncates the weight with length
     * 
     * @param weight
     * @param length
     * @return truncated weight
     */
    private static final int truncateWeight(int weight, int length) {
        return weight & (0xffffffff << ((4 - length) << 3));
    }

    /**
     * Length of the weight
     * 
     * @param weight
     * @return length of the weight
     */
    private static final int lengthOfWeight(int weight) {
        if ((weight & 0xffffff) == 0) {
            return 1;
        } else if ((weight & 0xffff) == 0) {
            return 2;
        } else if ((weight & 0xff) == 0) {
            return 3;
        }
        return 4;
    }

    /**
     * Increment the weight trail
     * 
     * @param weight
     * @param length
     * @return new weight
     */
    private static final int incWeightTrail(int weight, int length) {
        return weight + (1 << ((4 - length) << 3));
    }

    /**
     * Decrement the weight trail
     * 
     * @param weight
     * @param length
     * @return new weight
     */
    private static int decWeightTrail(int weight, int length) {
        return weight - (1 << ((4 - length) << 3));
    }

    /**
     * Gets the codepoint
     * 
     * @param tbl
     *            contraction table
     * @param codePoint
     *            code point to look for
     * @return the offset to the code point
     */
    private static int findCP(BasicContractionTable tbl, char codePoint) {
        int position = 0;
        while (codePoint > tbl.m_codePoints_.charAt(position)) {
            position++;
            if (position > tbl.m_codePoints_.length()) {
                return -1;
            }
        }
        if (codePoint == tbl.m_codePoints_.charAt(position)) {
            return position;
        } else {
            return -1;
        }
    }

    /**
     * Finds a contraction ce
     * 
     * @param table
     * @param element
     * @param ch
     * @return ce
     */
    private static int findCE(ContractionTable table, int element, char ch) {
        if (table == null) {
            return CE_NOT_FOUND_;
        }
        BasicContractionTable tbl = getBasicContractionTable(table, element);
        if (tbl == null) {
            return CE_NOT_FOUND_;
        }
        int position = findCP(tbl, ch);
        if (position > tbl.m_CEs_.size() || position < 0) {
            return CE_NOT_FOUND_;
        }
        return tbl.m_CEs_.get(position).intValue();
    }

    /**
     * Checks if the string is tailored in the contraction
     * 
     * @param table
     *            contraction table
     * @param element
     * @param array
     *            character array to check
     * @param offset
     *            array offset
     * @return true if it is tailored
     */
    private static boolean isTailored(ContractionTable table, int element,
            char array[], int offset) {
        while (array[offset] != 0) {
            element = findCE(table, element, array[offset]);
            if (element == CE_NOT_FOUND_) {
                return false;
            }
            if (!isContractionTableElement(element)) {
                return true;
            }
            offset++;
        }
        if (getCE(table, element, 0) != CE_NOT_FOUND_) {
            return true;
        } else {
            return false;
        }
    }

    /**
     * Assemble RuleBasedCollator
     * 
     * @param t
     *            build table
     * @param collator
     *            to update
     */
    private void assembleTable(BuildTable t, RuleBasedCollator collator) {
        IntTrieBuilder mapping = t.m_mapping_;
        List expansions = t.m_expansions_;
        ContractionTable contractions = t.m_contractions_;
        MaxExpansionTable maxexpansion = t.m_maxExpansions_;

        // contraction offset has to be in since we are building on the
        // UCA contractions
        // int beforeContractions = (HEADER_SIZE_
        // + paddedsize(expansions.size() << 2)) >>> 1;
        collator.m_contractionOffset_ = 0;
        int contractionsSize = constructTable(contractions);

        // the following operation depends on the trie data. Therefore, we have
        // to do it before the trie is compacted
        // sets jamo expansions
        getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_,
                collator.m_isJamoSpecial_);

        // TODO: LATIN1 array is now in the utrie - it should be removed from
        // the calculation
        setAttributes(collator, t.m_options_);
        // copy expansions
        int size = expansions.size();
        collator.m_expansion_ = new int[size];
        for (int i = 0; i < size; i++) {
            collator.m_expansion_[i] = expansions.get(i).intValue();
        }
        // contractions block
        if (contractionsSize != 0) {
            // copy contraction index
            collator.m_contractionIndex_ = new char[contractionsSize];
            contractions.m_codePoints_.getChars(0, contractionsSize,
                    collator.m_contractionIndex_, 0);
            // copy contraction collation elements
            collator.m_contractionCE_ = new int[contractionsSize];
            for (int i = 0; i < contractionsSize; i++) {
                collator.m_contractionCE_[i] = contractions.m_CEs_.get(i).intValue();
            }
        }
        // copy mapping table
        collator.m_trie_ = mapping.serialize(t,
                RuleBasedCollator.DataManipulate.getInstance());
        // copy max expansion table
        // not copying the first element which is a dummy
        // to be in synch with icu4c's builder, we continue to use the
        // expansion offset
        // omitting expansion offset in builder
        collator.m_expansionOffset_ = 0;
        size = maxexpansion.m_endExpansionCE_.size();
        collator.m_expansionEndCE_ = new int[size - 1];
        for (int i = 1; i < size; i++) {
            collator.m_expansionEndCE_[i - 1] = maxexpansion.m_endExpansionCE_
                    .get(i).intValue();
        }
        collator.m_expansionEndCEMaxSize_ = new byte[size - 1];
        for (int i = 1; i < size; i++) {
            collator.m_expansionEndCEMaxSize_[i - 1] = maxexpansion.m_expansionCESize_
                    .get(i).byteValue();
        }
        // Unsafe chars table. Finish it off, then copy it.
        unsafeCPAddCCNZ(t);
        // Or in unsafebits from UCA, making a combined table.
        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
            t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i];
        }
        collator.m_unsafe_ = t.m_unsafeCP_;

        // Finish building Contraction Ending chars hash table and then copy it
        // out.
        // Or in unsafebits from UCA, making a combined table
        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
            t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i];
        }
        collator.m_contractionEnd_ = t.m_contrEndCP_;
    }

    /**
     * Sets this collator to use the all options and tables in UCA.
     * 
     * @param collator
     *            which attribute is to be set
     * @param option
     *            to set with
     */
    private static final void setAttributes(RuleBasedCollator collator,
            CollationRuleParser.OptionSet option) {
        collator.latinOneFailed_ = true;
        collator.m_caseFirst_ = option.m_caseFirst_;
        collator.setDecomposition(option.m_decomposition_);
        collator
                .setAlternateHandlingShifted(option.m_isAlternateHandlingShifted_);
        collator.setCaseLevel(option.m_isCaseLevel_);
        collator.setFrenchCollation(option.m_isFrenchCollation_);
        collator.m_isHiragana4_ = option.m_isHiragana4_;
        collator.setStrength(option.m_strength_);
        collator.m_variableTopValue_ = option.m_variableTopValue_;
        collator.m_reorderCodes_ = option.m_scriptOrder_;
        collator.latinOneFailed_ = false;
    }

    /**
     * Constructing the contraction table
     * 
     * @param table
     *            contraction table
     * @return
     */
    private int constructTable(ContractionTable table) {
        // See how much memory we need
        int tsize = table.m_elements_.size();
        if (tsize == 0) {
            return 0;
        }
        table.m_offsets_.clear();
        int position = 0;
        for (int i = 0; i < tsize; i++) {
            table.m_offsets_.add(Integer.valueOf(position));
            position += table.m_elements_.get(i).m_CEs_
                    .size();
        }
        table.m_CEs_.clear();
        table.m_codePoints_.delete(0, table.m_codePoints_.length());
        // Now stuff the things in
        StringBuilder cpPointer = table.m_codePoints_;
        List CEPointer = table.m_CEs_;
        for (int i = 0; i < tsize; i++) {
            BasicContractionTable bct = table.m_elements_.get(i);
            int size = bct.m_CEs_.size();
            char ccMax = 0;
            char ccMin = 255;
            int offset = CEPointer.size();
            CEPointer.add(bct.m_CEs_.get(0));
            for (int j = 1; j < size; j++) {
                char ch = bct.m_codePoints_.charAt(j);
                char cc = (char) (UCharacter.getCombiningClass(ch) & 0xFF);
                if (cc > ccMax) {
                    ccMax = cc;
                }
                if (cc < ccMin) {
                    ccMin = cc;
                }
                cpPointer.append(ch);
                CEPointer.add(bct.m_CEs_.get(j));
            }
            cpPointer.insert(offset,
                    (char) (((ccMin == ccMax) ? 1 : 0 << 8) | ccMax));
            for (int j = 0; j < size; j++) {
                if (isContractionTableElement(CEPointer.get(offset + j).intValue())) {
                    int ce = CEPointer.get(offset + j).intValue();
                    CEPointer.set(offset + j,
                            Integer.valueOf(constructSpecialCE(getCETag(ce),
                                    table.m_offsets_.get(getContractionOffset(ce))
                                    .intValue())));
                }
            }
        }

        for (int i = 0; i <= 0x10FFFF; i++) {
            int CE = table.m_mapping_.getValue(i);
            if (isContractionTableElement(CE)) {
                CE = constructSpecialCE(getCETag(CE),
                        table.m_offsets_.get(getContractionOffset(CE)).intValue());
                table.m_mapping_.setValue(i, CE);
            }
        }
        return position;
    }

    /**
     * Get contraction offset
     * 
     * @param ce
     *            collation element
     * @return contraction offset
     */
    private static final int getContractionOffset(int ce) {
        return ce & 0xFFFFFF;
    }

    /**
     * Gets the maximum Jamo expansion
     * 
     * @param mapping
     *            trie table
     * @param maxexpansion
     *            maximum expansion table
     * @param maxjamoexpansion
     *            maximum jamo expansion table
     * @param jamospecial
     *            is jamo special?
     */
    private static void getMaxExpansionJamo(IntTrieBuilder mapping,
            MaxExpansionTable maxexpansion,
            MaxJamoExpansionTable maxjamoexpansion, boolean jamospecial) {
        int VBASE = 0x1161;
        int TBASE = 0x11A8;
        int VCOUNT = 21;
        int TCOUNT = 28;
        int v = VBASE + VCOUNT - 1;
        int t = TBASE + TCOUNT - 1;

        while (v >= VBASE) {
            int ce = mapping.getValue(v);
            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
                setMaxExpansion(ce, (byte) 2, maxexpansion);
            }
            v--;
        }

        while (t >= TBASE) {
            int ce = mapping.getValue(t);
            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
                setMaxExpansion(ce, (byte) 3, maxexpansion);
            }
            t--;
        }
        // According to the docs, 99% of the time, the Jamo will not be special
        if (jamospecial) {
            // gets the max expansion in all unicode characters
            int count = maxjamoexpansion.m_endExpansionCE_.size();
            byte maxTSize = (byte) (maxjamoexpansion.m_maxLSize_
                    + maxjamoexpansion.m_maxVSize_ + maxjamoexpansion.m_maxTSize_);
            byte maxVSize = (byte) (maxjamoexpansion.m_maxLSize_ + maxjamoexpansion.m_maxVSize_);

            while (count > 0) {
                count--;
                if ((maxjamoexpansion.m_isV_.get(count))
                        .booleanValue() == true) {
                    setMaxExpansion(
                            (maxjamoexpansion.m_endExpansionCE_
                                    .get(count)).intValue(), maxVSize,
                            maxexpansion);
                } else {
                    setMaxExpansion(
                            (maxjamoexpansion.m_endExpansionCE_
                                    .get(count)).intValue(), maxTSize,
                            maxexpansion);
                }
            }
        }
    }

    /**
     * To the UnsafeCP hash table, add all chars with combining class != 0
     * 
     * @param t
     *            build table
     */
    private final void unsafeCPAddCCNZ(BuildTable t) {
        boolean buildCMTable = (buildCMTabFlag & (t.cmLookup == null));
        char[] cm = null; // combining mark array
        int[] index = new int[256];
        int count = 0;

        if (buildCMTable) {
            cm = new char[0x10000];
        }
        for (char c = 0; c < 0xffff; c++) {
            int fcd;
            if (UTF16.isLeadSurrogate(c)) {
                fcd = 0;
                if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(c)) {
                    int supp = Character.toCodePoint(c, (char)0xdc00);
                    int suppLimit = supp + 0x400;
                    while (supp < suppLimit) {
                        fcd |= m_nfcImpl_.getFCD16FromNormData(supp++);
                    }
                }
            } else {
                fcd = m_nfcImpl_.getFCD16(c);
            }
            // TODO: review for handling supplementary characters
            if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
                    (UTF16.isLeadSurrogate(c) && fcd != 0)) {
                // c is a leading surrogate with some FCD data
                unsafeCPSet(t.m_unsafeCP_, c);
                if (buildCMTable) {
                    int cc = (fcd & 0xff);
                    int pos = (cc << 8) + index[cc];
                    cm[pos] = c;
                    index[cc]++;
                    count++;
                }
            }
        }

        if (t.m_prefixLookup_ != null) {
            Enumeration els = Collections.enumeration(t.m_prefixLookup_.values());
            while (els.hasMoreElements()) {
                Elements e = els.nextElement();
                // codepoints here are in the NFD form. We need to add the
                // first code point of the NFC form to unsafe, because
                // strcoll needs to backup over them.
                // weiv: This is wrong! See the comment above.
                // String decomp = Normalizer.decompose(e.m_cPoints_, true);
                // unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0));
                // it should be:
                String comp = Normalizer.compose(e.m_cPoints_, false);
                unsafeCPSet(t.m_unsafeCP_, comp.charAt(0));
            }
        }

        if (buildCMTable) {
            t.cmLookup = new CombinClassTable();
            t.cmLookup.generate(cm, count, index);
        }
    }

    /**
     * Create closure
     * 
     * @param t
     *            build table
     * @param collator
     *            RuleBasedCollator
     * @param colEl
     *            collation element iterator
     * @param start
     * @param limit
     * @param type
     *            character type
     * @return
     */
    private boolean enumCategoryRangeClosureCategory(BuildTable t,
            RuleBasedCollator collator, CollationElementIterator colEl,
            int start, int limit, int type) {
        if (type != UCharacterCategory.UNASSIGNED
                && type != UCharacterCategory.PRIVATE_USE) {
            // if the range is assigned - we might ommit more categories later

            for (int u32 = start; u32 < limit; u32++) {
                String decomp = m_nfcImpl_.getDecomposition(u32);
                if (decomp != null) {
                    String comp = UCharacter.toString(u32);
                    if (!collator.equals(comp, decomp)) {
                        m_utilElement_.m_cPoints_ = decomp;
                        m_utilElement_.m_prefix_ = 0;
                        Elements prefix = t.m_prefixLookup_.get(m_utilElement_);
                        if (prefix == null) {
                            m_utilElement_.m_cPoints_ = comp;
                            m_utilElement_.m_prefix_ = 0;
                            m_utilElement_.m_prefixChars_ = null;
                            colEl.setText(decomp);
                            int ce = colEl.next();
                            m_utilElement_.m_CELength_ = 0;
                            while (ce != CollationElementIterator.NULLORDER) {
                                m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = ce;
                                ce = colEl.next();
                            }
                        } else {
                            m_utilElement_.m_cPoints_ = comp;
                            m_utilElement_.m_prefix_ = 0;
                            m_utilElement_.m_prefixChars_ = null;
                            m_utilElement_.m_CELength_ = 1;
                            m_utilElement_.m_CEs_[0] = prefix.m_mapCE_;
                            // This character uses a prefix. We have to add it
                            // to the unsafe table, as it decomposed form is
                            // already in. In Japanese, this happens for \u309e
                            // & \u30fe
                            // Since unsafeCPSet is static in ucol_elm, we are
                            // going to wrap it up in the unsafeCPAddCCNZ
                            // function
                        }
                        addAnElement(t, m_utilElement_);
                    }
                }
            }
        }
        return true;
    }

    /**
     * Determine if a character is a Jamo
     * 
     * @param ch
     *            character to test
     * @return true if ch is a Jamo, false otherwise
     */
    private static final boolean isJamo(char ch) {
        return (ch >= 0x1100 && ch <= 0x1112) || (ch >= 0x1175 && ch <= 0x1161)
                || (ch >= 0x11A8 && ch <= 0x11C2);
    }

    /**
     * Produces canonical closure
     */
    private void canonicalClosure(BuildTable t) {
        BuildTable temp = new BuildTable(t);
        assembleTable(temp, temp.m_collator_);
        // produce canonical closure
        CollationElementIterator coleiter = temp.m_collator_
                .getCollationElementIterator("");
        RangeValueIterator typeiter = UCharacter.getTypeIterator();
        RangeValueIterator.Element element = new RangeValueIterator.Element();
        while (typeiter.next(element)) {
            enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter,
                    element.start, element.limit, element.value);
        }

        t.cmLookup = temp.cmLookup;
        temp.cmLookup = null;

        for (int i = 0; i < m_parser_.m_resultLength_; i++) {
            char baseChar, firstCM;
            // now we need to generate the CEs
            // We stuff the initial value in the buffers, and increase the
            // appropriate buffer according to strength */
            // createElements(t, m_parser_.m_listHeader_[i]);
            CollationRuleParser.Token tok = m_parser_.m_listHeader_[i].m_first_;
            m_utilElement_.clear();
            while (tok != null) {
                m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
                m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
                if (tok.m_prefix_ != 0) {
                    // we will just copy the prefix here, and adjust accordingly
                    // in
                    // the addPrefix function in ucol_elm. The reason is that we
                    // need to add both composed AND decomposed elements to the
                    // unsafe table.
                    int size = tok.m_prefix_ >> 24;
                    int offset = tok.m_prefix_ & 0x00FFFFFF;
                    m_utilElement_.m_prefixChars_ = m_parser_.m_source_
                            .substring(offset, offset + size);
                    size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
                    offset = (tok.m_source_ & 0x00FFFFFF)
                            + (tok.m_prefix_ >> 24);
                    m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
                            offset, offset + size);
                } else {
                    m_utilElement_.m_prefixChars_ = null;
                    int offset = tok.m_source_ & 0x00FFFFFF;
                    int size = tok.m_source_ >>> 24;
                    m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
                            offset, offset + size);
                }
                m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;

                baseChar = firstCM = 0; // reset
                for (int j = 0; j < m_utilElement_.m_cPoints_.length()
                        - m_utilElement_.m_cPointsOffset_; j++) {

                    int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(j));  // TODO: review for handling supplementary characters
                    if ((fcd & 0xff) == 0) {
                        baseChar = m_utilElement_.m_cPoints_.charAt(j);
                    } else {
                        if ((baseChar != 0) && (firstCM == 0)) {
                            firstCM = m_utilElement_.m_cPoints_.charAt(j); // first
                                                                           // combining
                                                                           // mark
                        }
                    }
                }

                if ((baseChar != 0) && (firstCM != 0)) {
                    addTailCanonicalClosures(t, temp.m_collator_, coleiter,
                            baseChar, firstCM);
                }
                tok = tok.m_next_;
            }
        }
    }

    private void addTailCanonicalClosures(BuildTable t,
            RuleBasedCollator m_collator, CollationElementIterator colEl,
            char baseChar, char cMark) {
        if (t.cmLookup == null) {
            return;
        }
        CombinClassTable cmLookup = t.cmLookup;
        int[] index = cmLookup.index;
        int cClass = m_nfcImpl_.getFCD16(cMark) & 0xff;  // TODO: review for handling supplementary characters
        int maxIndex = 0;
        char[] precompCh = new char[256];
        int[] precompClass = new int[256];
        int precompLen = 0;
        Elements element = new Elements();

        if (cClass > 0) {
            maxIndex = index[cClass - 1];
        }
        for (int i = 0; i < maxIndex; i++) {
            StringBuilder decompBuf = new StringBuilder();
            decompBuf.append(baseChar).append(cmLookup.cPoints[i]);
            String comp = Normalizer.compose(decompBuf.toString(), false);
            if (comp.length() == 1) {
                precompCh[precompLen] = comp.charAt(0);
                precompClass[precompLen] = m_nfcImpl_.getFCD16(cmLookup.cPoints[i]) & 0xff;  // TODO: review for handling supplementary characters
                precompLen++;
                StringBuilder decomp = new StringBuilder();
                for (int j = 0; j < m_utilElement_.m_cPoints_.length(); j++) {
                    if (m_utilElement_.m_cPoints_.charAt(j) == cMark) {
                        decomp.append(cmLookup.cPoints[i]);
                    } else {
                        decomp.append(m_utilElement_.m_cPoints_.charAt(j));
                    }
                }
                comp = Normalizer.compose(decomp.toString(), false);
                StringBuilder buf = new StringBuilder(comp);
                buf.append(cMark);
                decomp.append(cMark);
                comp = buf.toString();

                element.m_cPoints_ = decomp.toString();
                element.m_CELength_ = 0;
                element.m_prefix_ = 0;
                Elements prefix = t.m_prefixLookup_.get(element);
                element.m_cPoints_ = comp;
                element.m_uchars_ = comp;

                if (prefix == null) {
                    element.m_prefix_ = 0;
                    element.m_prefixChars_ = null;
                    colEl.setText(decomp.toString());
                    int ce = colEl.next();
                    element.m_CELength_ = 0;
                    while (ce != CollationElementIterator.NULLORDER) {
                        element.m_CEs_[element.m_CELength_++] = ce;
                        ce = colEl.next();
                    }
                } else {
                    element.m_cPoints_ = comp;
                    element.m_prefix_ = 0;
                    element.m_prefixChars_ = null;
                    element.m_CELength_ = 1;
                    element.m_CEs_[0] = prefix.m_mapCE_;
                }
                setMapCE(t, element);
                finalizeAddition(t, element);

                if (comp.length() > 2) {
                    // This is a fix for tailoring contractions with accented
                    // character at the end of contraction string.
                    addFCD4AccentedContractions(t, colEl, comp, element);
                }
                if (precompLen > 1) {
                    precompLen = addMultiCMontractions(t, colEl, element,
                            precompCh, precompClass, precompLen, cMark, i,
                            decomp.toString());
                }
            }
        }

    }

    private void setMapCE(BuildTable t, Elements element) {
        List expansions = t.m_expansions_;
        element.m_mapCE_ = 0;

        if (element.m_CELength_ == 2 // a two CE expansion
                && RuleBasedCollator.isContinuation(element.m_CEs_[1])
                && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
                                                                                                          // has
                                                                                                          // only
                                                                                                          // primaries
                                                                                                          // in
                                                                                                          // continuation
                && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
                // a common secondary
                && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_)) { // and
                                                                                     // a
                                                                                     // common
                                                                                     // tertiary

            element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
            // a long primary special
                    | (CE_LONG_PRIMARY_TAG_ << 24)
                    // first and second byte of primary
                    | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
                    // third byte of primary
                    | ((element.m_CEs_[1] >> 24) & 0xFF);
        } else {
            // omitting expansion offset in builder
            // (HEADER_SIZE_ >> 2)
            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
                    | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
                    | (addExpansion(expansions, element.m_CEs_[0]) << 4)
                    & 0xFFFFF0;

            for (int i = 1; i < element.m_CELength_; i++) {
                addExpansion(expansions, element.m_CEs_[i]);
            }
            if (element.m_CELength_ <= 0xF) {
                expansion |= element.m_CELength_;
            } else {
                addExpansion(expansions, 0);
            }
            element.m_mapCE_ = expansion;
            setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
                    (byte) element.m_CELength_, t.m_maxExpansions_);
        }
    }

    private int addMultiCMontractions(BuildTable t,
            CollationElementIterator colEl, Elements element, char[] precompCh,
            int[] precompClass, int maxComp, char cMark, int cmPos,
            String decomp) {

        CombinClassTable cmLookup = t.cmLookup;
        char[] combiningMarks = { cMark };
        int cMarkClass = UCharacter.getCombiningClass(cMark) & 0xFF;
        String comMark = new String(combiningMarks);
        int noOfPrecomposedChs = maxComp;

        for (int j = 0; j < maxComp; j++) {
            int count = 0;
            StringBuilder temp;

            do {
                String newDecomp, comp;

                if (count == 0) { // Decompose the saved precomposed char.
                    newDecomp = Normalizer.decompose(
                            new String(precompCh, j, 1), false);
                    temp = new StringBuilder(newDecomp);
                    temp.append(cmLookup.cPoints[cmPos]);
                    newDecomp = temp.toString();
                } else {
                    temp = new StringBuilder(decomp);
                    temp.append(precompCh[j]);
                    newDecomp = temp.toString();
                }
                comp = Normalizer.compose(newDecomp, false);
                if (comp.length() == 1) {
                    temp.append(cMark);
                    element.m_cPoints_ = temp.toString();
                    element.m_CELength_ = 0;
                    element.m_prefix_ = 0;
                    Elements prefix = t.m_prefixLookup_.get(element);
                    element.m_cPoints_ = comp + comMark;
                    if (prefix == null) {
                        element.m_prefix_ = 0;
                        element.m_prefixChars_ = null;
                        colEl.setText(temp.toString());
                        int ce = colEl.next();
                        element.m_CELength_ = 0;
                        while (ce != CollationElementIterator.NULLORDER) {
                            element.m_CEs_[element.m_CELength_++] = ce;
                            ce = colEl.next();
                        }
                    } else {
                        element.m_cPoints_ = comp;
                        element.m_prefix_ = 0;
                        element.m_prefixChars_ = null;
                        element.m_CELength_ = 1;
                        element.m_CEs_[0] = prefix.m_mapCE_;
                    }
                    setMapCE(t, element);
                    finalizeAddition(t, element);
                    precompCh[noOfPrecomposedChs] = comp.charAt(0);
                    precompClass[noOfPrecomposedChs] = cMarkClass;
                    noOfPrecomposedChs++;
                }
            } while (++count < 2 && (precompClass[j] == cMarkClass));
        }
        return noOfPrecomposedChs;
    }

    private void addFCD4AccentedContractions(BuildTable t,
            CollationElementIterator colEl, String data, Elements element) {
        String decomp = Normalizer.decompose(data, false);
        String comp = Normalizer.compose(data, false);

        element.m_cPoints_ = decomp;
        element.m_CELength_ = 0;
        element.m_prefix_ = 0;
        Elements prefix = t.m_prefixLookup_.get(element);
        if (prefix == null) {
            element.m_cPoints_ = comp;
            element.m_prefix_ = 0;
            element.m_prefixChars_ = null;
            element.m_CELength_ = 0;
            colEl.setText(decomp);
            int ce = colEl.next();
            element.m_CELength_ = 0;
            while (ce != CollationElementIterator.NULLORDER) {
                element.m_CEs_[element.m_CELength_++] = ce;
                ce = colEl.next();
            }
            addAnElement(t, element);
        }
    }

    private void processUCACompleteIgnorables(BuildTable t) {
        TrieIterator trieiterator = new TrieIterator(
                RuleBasedCollator.UCA_.m_trie_);
        RangeValueIterator.Element element = new RangeValueIterator.Element();
        while (trieiterator.next(element)) {
            int start = element.start;
            int limit = element.limit;
            if (element.value == 0) {
                while (start < limit) {
                    int CE = t.m_mapping_.getValue(start);
                    if (CE == CE_NOT_FOUND_) {
                        m_utilElement_.m_prefix_ = 0;
                        m_utilElement_.m_uchars_ = UCharacter.toString(start);
                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
                        m_utilElement_.m_cPointsOffset_ = 0;
                        m_utilElement_.m_CELength_ = 1;
                        m_utilElement_.m_CEs_[0] = 0;
                        addAnElement(t, m_utilElement_);
                    }
                    start++;
                }
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy