com.ibm.icu.text.CollationRuleParser Maven / Gradle / Ivy

Go to download
/**
*******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*/
package com.ibm.icu.text;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Collator.ReorderCodes;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;

/**
* Class for parsing collation rules, produces a list of tokens that will be
* turned into collation elements
* @author Syn Wee Quek
* @since release 2.2, June 7 2002
*/
final class CollationRuleParser
{
    // public data members ---------------------------------------------------

    // package private constructors ------------------------------------------

    /**
     * RuleBasedCollator constructor that takes the rules.
     * Please see RuleBasedCollator class description for more details on the
     * collation rule syntax.
     * @see java.util.Locale
     * @param rules the collation rules to build the collation table from.
     * @exception ParseException thrown when argument rules have an invalid
     *            syntax.
     */
    CollationRuleParser(String rules) throws ParseException
    {
        // Prepares m_copySet_ and m_removeSet_.
        rules = preprocessRules(rules);

        // Save the rules as a long string.  The StringBuilder object is
        // used to store the result of token parsing as well.
        m_source_ = new StringBuilder(Normalizer.decompose(rules, false).trim());
        m_rules_ = m_source_.toString();

        // Index of the next unparsed character.
        m_current_ = 0;

        // Index of the next unwritten character in the parsed result.
        m_extraCurrent_ = m_source_.length();

        m_variableTop_ = null;
        m_parsedToken_ = new ParsedToken();
        m_hashTable_ = new HashMap();
        m_options_ = new OptionSet(RuleBasedCollator.UCA_);
        m_listHeader_ = new TokenListHeader[512];
        m_resultLength_ = 0;
        // call assembleTokenList() manually, so that we can
        // init a parser and manually parse tokens
        //assembleTokenList();
    }

    // package private inner classes -----------------------------------------

    /**
     * Collation options set
     */
    static class OptionSet
    {
        // package private constructor ---------------------------------------

        /**
         * Initializes the option set with the argument collators
         * @param collator option to use
         */
        OptionSet(RuleBasedCollator collator)
        {
            m_variableTopValue_ = collator.m_variableTopValue_;
            m_isFrenchCollation_ = collator.isFrenchCollation();
            m_isAlternateHandlingShifted_
                                   = collator.isAlternateHandlingShifted();
            m_caseFirst_ = collator.m_caseFirst_;
            m_isCaseLevel_ = collator.isCaseLevel();
            m_decomposition_ = collator.getDecomposition();
            m_strength_ = collator.getStrength();
            m_isHiragana4_ = collator.m_isHiragana4_;

            if(collator.m_reorderCodes_ != null){ 
                m_scriptOrder_ = new int[collator.m_reorderCodes_.length]; 
                for(int i = 0; i < m_scriptOrder_.length; i++){ 
                    m_scriptOrder_[i] = collator.m_reorderCodes_[i]; 
                } 
            } 

        }

        // package private data members --------------------------------------

        int m_variableTopValue_;
        boolean m_isFrenchCollation_;
        /**
         * Attribute for handling variable elements
         */
        boolean m_isAlternateHandlingShifted_;
        /**
         * who goes first, lower case or uppercase
         */
        int m_caseFirst_;
        /**
         * do we have an extra case level
         */
        boolean m_isCaseLevel_;
        /**
         * attribute for normalization
         */
        int m_decomposition_;
        /**
         * attribute for strength
         */
        int m_strength_;
        /**
         * attribute for special Hiragana
         */
        boolean m_isHiragana4_;
        
        /** 
         * the ordering of the scripts 
         */ 
        int[] m_scriptOrder_;   
    }

    /**
     * List of tokens used by the collation rules
     */
    static class TokenListHeader
    {
        Token m_first_;
        Token m_last_;
        Token m_reset_;
        boolean m_indirect_;
        int m_baseCE_;
        int m_baseContCE_;
        int m_nextCE_;
        int m_nextContCE_;
        int m_previousCE_;
        int m_previousContCE_;
        int m_pos_[] = new int[Collator.IDENTICAL + 1];
        int m_gapsLo_[] = new int[3 * (Collator.TERTIARY + 1)];
        int m_gapsHi_[] = new int[3 * (Collator.TERTIARY + 1)];
        int m_numStr_[] = new int[3 * (Collator.TERTIARY + 1)];
        Token m_fStrToken_[] = new Token[Collator.TERTIARY + 1];
        Token m_lStrToken_[] = new Token[Collator.TERTIARY + 1];
    }

    /**
     * Token wrapper for collation rules
     */
    static class Token
    {
       // package private data members ---------------------------------------

       int m_CE_[];
       int m_CELength_;
       int m_expCE_[];
       int m_expCELength_;
       int m_source_;
       int m_expansion_;
       int m_prefix_;
       int m_strength_;
       int m_toInsert_;
       int m_polarity_; // 1 for <, <<, <<<, , ; and 0 for >, >>, >>>
       TokenListHeader m_listHeader_;
       Token m_previous_;
       Token m_next_;
       StringBuilder m_rules_;
       char m_flags_;

       // package private constructors ---------------------------------------

       Token()
       {
           m_CE_ = new int[128];
           m_expCE_ = new int[128];
           // TODO: this should also handle reverse
           m_polarity_ = TOKEN_POLARITY_POSITIVE_;
           m_next_ = null;
           m_previous_ = null;
           m_CELength_ = 0;
           m_expCELength_ = 0;
       }

       // package private methods --------------------------------------------

       /**
        * Hashcode calculation for token
        * @return the hashcode
        */
       public int hashCode()
       {
           int result = 0;
           int len = (m_source_ & 0xFF000000) >>> 24;
           int inc = ((len - 32) / 32) + 1;

           int start = m_source_ & 0x00FFFFFF;
           int limit = start + len;

           while (start < limit) {
               result = (result * 37) + m_rules_.charAt(start);
               start += inc;
           }
           return result;
       }

       /**
        * Equals calculation
        * @param target object to compare
        * @return true if target is the same as this object
        */
       public boolean equals(Object target)
       {
           if (target == this) {
               return true;
           }
           if (target instanceof Token) {
               Token t = (Token)target;
               int sstart = m_source_ & 0x00FFFFFF;
               int tstart = t.m_source_ & 0x00FFFFFF;
               int slimit = (m_source_ & 0xFF000000) >> 24;
               int tlimit = (m_source_ & 0xFF000000) >> 24;

               int end = sstart + slimit - 1;

               if (m_source_ == 0 || t.m_source_ == 0) {
                   return false;
               }
               if (slimit != tlimit) {
                   return false;
               }
               if (m_source_ == t.m_source_) {
                   return true;
               }

               while (sstart < end
                      && m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart))
               {
                   ++ sstart;
                   ++ tstart;
               }
               if (m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) {
                   return true;
               }
           }
           return false;
        }
    }

    // package private data member -------------------------------------------

    /**
     * Indicator that the token is resetted yet, ie & in the rules
     */
    static final int TOKEN_RESET_ = 0xDEADBEEF;

    /**
     * Size of the number of tokens
     */
    int m_resultLength_;
    /**
     * List of parsed tokens
     */
    TokenListHeader m_listHeader_[];
    /**
     * Variable top token
     */
    Token m_variableTop_;
    /**
     * Collation options
     */
    OptionSet m_options_;
    /**
     * Normalized collation rules with some extra characters
     */
    StringBuilder m_source_;
    /**
     * Hash table to keep all tokens
     */
    Map m_hashTable_;

    // package private method ------------------------------------------------

    void setDefaultOptionsInCollator(RuleBasedCollator collator)
    {
        collator.m_defaultStrength_ = m_options_.m_strength_;
        collator.m_defaultDecomposition_ = m_options_.m_decomposition_;
        collator.m_defaultIsFrenchCollation_ = m_options_.m_isFrenchCollation_;
        collator.m_defaultIsAlternateHandlingShifted_
                                    = m_options_.m_isAlternateHandlingShifted_;
        collator.m_defaultIsCaseLevel_ = m_options_.m_isCaseLevel_;
        collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_;
        collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_;
        collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_;
        if(m_options_.m_scriptOrder_ != null) { 
            collator.m_defaultReorderCodes_ = m_options_.m_scriptOrder_.clone(); 
        } else { 
            collator.m_defaultReorderCodes_ = null; 
        }  
    }

    // private inner classes -------------------------------------------------

    /**
     * This is a token that has been parsed but not yet processed. Used to
     * reduce the number of arguments in the parser
     */
    private static class ParsedToken
    {
        // private constructor ----------------------------------------------

        /**
         * Empty constructor
         */
        ParsedToken()
        {
            m_charsLen_ = 0;
            m_charsOffset_ = 0;
            m_extensionLen_ = 0;
            m_extensionOffset_ = 0;
            m_prefixLen_ = 0;
            m_prefixOffset_ = 0;
            m_flags_ = 0;
            m_strength_ = TOKEN_UNSET_;
        }

        // private data members ---------------------------------------------

        int m_strength_;
        int m_charsOffset_;
        int m_charsLen_;
        int m_extensionOffset_;
        int m_extensionLen_;
        int m_prefixOffset_;
        int m_prefixLen_;
        char m_flags_;
        char m_indirectIndex_;
    }

    /**
     * Boundary wrappers
     */
    private static class IndirectBoundaries
    {
        // package private constructor ---------------------------------------

        IndirectBoundaries(int startce[], int limitce[])
        {
            // Set values for the top - TODO: once we have values for all the
            // indirects, we are going to initalize here.
            m_startCE_ = startce[0];
            m_startContCE_ = startce[1];
            if (limitce != null) {
                m_limitCE_ = limitce[0];
                m_limitContCE_ = limitce[1];
            }
            else {
                m_limitCE_ = 0;
                m_limitContCE_ = 0;
            }
        }

        // package private data members --------------------------------------

        int m_startCE_;
        int m_startContCE_;
        int m_limitCE_;
        int m_limitContCE_;
    }

    /**
     * Collation option rule tag
     */
    private static class TokenOption
    {
        // package private constructor ---------------------------------------

        TokenOption(String name, int attribute, String suboptions[],
                    int suboptionattributevalue[])
        {
            m_name_ = name;
            m_attribute_ = attribute;
            m_subOptions_ = suboptions;
            m_subOptionAttributeValues_ = suboptionattributevalue;
        }

        // package private data member ---------------------------------------

        private String m_name_;
        private int m_attribute_;
        private String m_subOptions_[];
        private int m_subOptionAttributeValues_[];
    }

    // private variables -----------------------------------------------------

    /**
     * Current parsed token
     */
    private ParsedToken m_parsedToken_;
    /**
     * Collation rule
     */
    private String m_rules_;
    private int m_current_;
    /**
     * End of the option while reading.
     * Need it for UnicodeSet reading support.
     */
    private int m_optionEnd_;
    /*
     * Current offset in m_source
     */
    //private int m_sourceLimit_;
    /**
     * Offset to m_source_ ofr the extra expansion characters
     */
    private int m_extraCurrent_;

    /**
     * UnicodeSet that contains code points to be copied from the UCA
     */
    UnicodeSet m_copySet_;

    /**
     * UnicodeSet that contains code points for which we want to remove
     * UCA contractions. It implies copying of these code points from
     * the UCA.
     */
    UnicodeSet m_removeSet_;

    /*
     * This is space for the extra strings that need to be unquoted during the
     * parsing of the rules
     */
    //private static final int TOKEN_EXTRA_RULE_SPACE_SIZE_ = 2048;
    /**
     * Indicator that the token is not set yet
     */
    private static final int TOKEN_UNSET_ = 0xFFFFFFFF;
    /*
     * Indicator that the rule is in the > polarity, ie everything on the
     * right of the rule is less than
     */
    //private static final int TOKEN_POLARITY_NEGATIVE_ = 0;
    /**
     * Indicator that the rule is in the < polarity, ie everything on the
     * right of the rule is greater than
     */
    private static final int TOKEN_POLARITY_POSITIVE_ = 1;
    /**
     * Flag mask to determine if top is set
     */
    private static final int TOKEN_TOP_MASK_ = 0x04;
    /**
     * Flag mask to determine if variable top is set
     */
    private static final int TOKEN_VARIABLE_TOP_MASK_ = 0x08;
    /**
     * Flag mask to determine if a before attribute is set
     */
    private static final int TOKEN_BEFORE_ = 0x03;
    /**
     * For use in parsing token options
     */
    private static final int TOKEN_SUCCESS_MASK_ = 0x10;

    /**
     * These values are used for finding CE values for indirect positioning.
     * Indirect positioning is a mechanism for allowing resets on symbolic
     * values. It only works for resets and you cannot tailor indirect names.
     * An indirect name can define either an anchor point or a range. An anchor
     * point behaves in exactly the same way as a code point in reset would,
     * except that it cannot be tailored. A range (we currently only know for
     * the [top] range will explicitly set the upper bound for generated CEs,
     * thus allowing for better control over how many CEs can be squeezed
     * between in the range without performance penalty. In that respect, we use
     * [top] for tailoring of locales that use CJK characters. Other indirect
     * values are currently a pure convenience, they can be used to assure that
     * the CEs will be always positioned in the same place relative to a point
     * with known properties (e.g. first primary ignorable).
     */
    private static final IndirectBoundaries INDIRECT_BOUNDARIES_[];

//    /**
//     * Inverse UCA constants
//     */
//    private static final int INVERSE_SIZE_MASK_ = 0xFFF00000;
//    private static final int INVERSE_OFFSET_MASK_ = 0x000FFFFF;
//    private static final int INVERSE_SHIFT_VALUE_ = 20;

    /**
     * Collation option tags
     * [last variable] last variable value
     * [last primary ignorable] largest CE for primary ignorable
     * [last secondary ignorable] largest CE for secondary ignorable
     * [last tertiary ignorable] largest CE for tertiary ignorable
     * [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
     */
    private static final TokenOption RULES_OPTIONS_[];

    static
    {
        INDIRECT_BOUNDARIES_ = new IndirectBoundaries[15];
        // UCOL_RESET_TOP_VALUE
        INDIRECT_BOUNDARIES_[0] = new IndirectBoundaries(
                        RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_,
                        RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_);
        // UCOL_FIRST_PRIMARY_IGNORABLE
        INDIRECT_BOUNDARIES_[1] = new IndirectBoundaries(
                    RuleBasedCollator.UCA_CONSTANTS_.FIRST_PRIMARY_IGNORABLE_,
                    null);
        // UCOL_LAST_PRIMARY_IGNORABLE
        INDIRECT_BOUNDARIES_[2] = new IndirectBoundaries(
                    RuleBasedCollator.UCA_CONSTANTS_.LAST_PRIMARY_IGNORABLE_,
                    null);

        // UCOL_FIRST_SECONDARY_IGNORABLE
        INDIRECT_BOUNDARIES_[3] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_SECONDARY_IGNORABLE_,
                   null);
        // UCOL_LAST_SECONDARY_IGNORABLE
        INDIRECT_BOUNDARIES_[4] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.LAST_SECONDARY_IGNORABLE_,
                   null);
        // UCOL_FIRST_TERTIARY_IGNORABLE
        INDIRECT_BOUNDARIES_[5] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_TERTIARY_IGNORABLE_,
                   null);
        // UCOL_LAST_TERTIARY_IGNORABLE
        INDIRECT_BOUNDARIES_[6] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.LAST_TERTIARY_IGNORABLE_,
                   null);
        // UCOL_FIRST_VARIABLE;
        INDIRECT_BOUNDARIES_[7] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_VARIABLE_,
                   null);
        // UCOL_LAST_VARIABLE
        INDIRECT_BOUNDARIES_[8] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.LAST_VARIABLE_,
                   null);
        // UCOL_FIRST_NON_VARIABLE
        INDIRECT_BOUNDARIES_[9] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_NON_VARIABLE_,
                   null);
        // UCOL_LAST_NON_VARIABLE
        INDIRECT_BOUNDARIES_[10] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_,
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_);
        // UCOL_FIRST_IMPLICIT
        INDIRECT_BOUNDARIES_[11] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_,
                   null);
        // UCOL_LAST_IMPLICIT
        INDIRECT_BOUNDARIES_[12] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.LAST_IMPLICIT_,
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_);
        // UCOL_FIRST_TRAILING
        INDIRECT_BOUNDARIES_[13] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_,
                   null);
        // UCOL_LAST_TRAILING
        INDIRECT_BOUNDARIES_[14] = new IndirectBoundaries(
                   RuleBasedCollator.UCA_CONSTANTS_.LAST_TRAILING_,
                   null);
        INDIRECT_BOUNDARIES_[14].m_limitCE_
                 = RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24;

        RULES_OPTIONS_ = new TokenOption[20];
        String option[] = {"non-ignorable", "shifted"};
        int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_,
                       RuleBasedCollator.AttributeValue.SHIFTED_};
        RULES_OPTIONS_[0] = new TokenOption("alternate",
                              RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
                              option, value);
        option = new String[1];
        option[0] = "2";
        value = new int[1];
        value[0] = RuleBasedCollator.AttributeValue.ON_;
        RULES_OPTIONS_[1] = new TokenOption("backwards",
                                 RuleBasedCollator.Attribute.FRENCH_COLLATION_,
                                 option, value);
        String offonoption[] = new String[2];
        offonoption[0] = "off";
        offonoption[1] = "on";
        int offonvalue[] = new int[2];
        offonvalue[0] = RuleBasedCollator.AttributeValue.OFF_;
        offonvalue[1] = RuleBasedCollator.AttributeValue.ON_;
        RULES_OPTIONS_[2] = new TokenOption("caseLevel",
                                       RuleBasedCollator.Attribute.CASE_LEVEL_,
                                       offonoption, offonvalue);
        option = new String[3];
        option[0] = "lower";
        option[1] = "upper";
        option[2] = "off";
        value = new int[3];
        value[0] = RuleBasedCollator.AttributeValue.LOWER_FIRST_;
        value[1] = RuleBasedCollator.AttributeValue.UPPER_FIRST_;
        value[2] = RuleBasedCollator.AttributeValue.OFF_;
        RULES_OPTIONS_[3] = new TokenOption("caseFirst",
                                       RuleBasedCollator.Attribute.CASE_FIRST_,
                                       option, value);
        RULES_OPTIONS_[4] = new TokenOption("normalization",
                               RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
                               offonoption, offonvalue);
        RULES_OPTIONS_[5] = new TokenOption("hiraganaQ",
                         RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
                         offonoption, offonvalue);
        option = new String[5];
        option[0] = "1";
        option[1] = "2";
        option[2] = "3";
        option[3] = "4";
        option[4] = "I";
        value = new int[5];
        value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
        value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
        value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
        value[3] = RuleBasedCollator.AttributeValue.QUATERNARY_;
        value[4] = RuleBasedCollator.AttributeValue.IDENTICAL_;
        RULES_OPTIONS_[6] = new TokenOption("strength",
                                         RuleBasedCollator.Attribute.STRENGTH_,
                                         option, value);
        RULES_OPTIONS_[7] = new TokenOption("variable top",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[8] = new TokenOption("rearrange",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        option = new String[3];
        option[0] = "1";
        option[1] = "2";
        option[2] = "3";
        value = new int[3];
        value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
        value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
        value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
        RULES_OPTIONS_[9] = new TokenOption("before",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  option, value);
        RULES_OPTIONS_[10] = new TokenOption("top",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        String firstlastoption[] = new String[7];
        firstlastoption[0] = "primary";
        firstlastoption[1] = "secondary";
        firstlastoption[2] = "tertiary";
        firstlastoption[3] = "variable";
        firstlastoption[4] = "regular";
        firstlastoption[5] = "implicit";
        firstlastoption[6] = "trailing";

        int firstlastvalue[] = new int[7];
        Arrays.fill(firstlastvalue, RuleBasedCollator.AttributeValue.PRIMARY_);

        RULES_OPTIONS_[11] = new TokenOption("first",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  firstlastoption, firstlastvalue);
        RULES_OPTIONS_[12] = new TokenOption("last",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  firstlastoption, firstlastvalue);
        RULES_OPTIONS_[13] = new TokenOption("optimize",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[14] = new TokenOption("suppressContractions",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[15] = new TokenOption("undefined",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[16] = new TokenOption("reorder",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[17] = new TokenOption("charsetname",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[18] = new TokenOption("charset",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
        RULES_OPTIONS_[19] = new TokenOption("import",
                                  RuleBasedCollator.Attribute.LIMIT_,
                                  null, null);
    }

    /**
     * Utility data members
     */
    private Token m_utilToken_ = new Token();
    private CollationElementIterator m_UCAColEIter_
                      = RuleBasedCollator.UCA_.getCollationElementIterator("");
    private int m_utilCEBuffer_[] = new int[2];

    private boolean m_isStarred_;

    private int m_currentStarredCharIndex_;


    private int m_lastStarredCharIndex_;

    private int m_currentRangeCp_;

    private int m_lastRangeCp_;

    private boolean m_inRange_;

    private int m_previousCp_;

    private boolean m_savedIsStarred_;


    // private methods -------------------------------------------------------

    /**
     * Assembles the token list
     * @exception ParseException thrown when rules syntax fails
     */
    int assembleTokenList() throws ParseException
    {
        Token lastToken = null;
        m_parsedToken_.m_strength_ = TOKEN_UNSET_;
        int sourcelimit = m_source_.length();
        int expandNext = 0;

        m_isStarred_ = false;

        while (m_current_ < sourcelimit || m_isStarred_) {
            m_parsedToken_.m_prefixOffset_ = 0;
            if (parseNextToken(lastToken == null) < 0) {
                // we have reached the end
                continue;
            }
            char specs = m_parsedToken_.m_flags_;
            boolean variableTop = ((specs & TOKEN_VARIABLE_TOP_MASK_) != 0);
            boolean top = ((specs & TOKEN_TOP_MASK_) != 0);
            int lastStrength = TOKEN_UNSET_;
            if (lastToken != null) {
                lastStrength = lastToken.m_strength_;
            }
            m_utilToken_.m_source_ = m_parsedToken_.m_charsLen_ << 24
                                             | m_parsedToken_.m_charsOffset_;
            m_utilToken_.m_rules_ = m_source_;
            // 4 Lookup each source in the CharsToToken map, and find a
            // sourcetoken
            Token sourceToken = m_hashTable_.get(m_utilToken_);
            if (m_parsedToken_.m_strength_ != TOKEN_RESET_) {
                if (lastToken == null) {
                    // this means that rules haven't started properly
                    throwParseException(m_source_.toString(), 0);
                }
                //  6 Otherwise (when relation != reset)
                if (sourceToken == null) {
                    // If sourceToken is null, create new one
                    sourceToken = new Token();
                     sourceToken.m_rules_ = m_source_;
                    sourceToken.m_source_ = m_parsedToken_.m_charsLen_ << 24
                                           | m_parsedToken_.m_charsOffset_;
                    sourceToken.m_prefix_ = m_parsedToken_.m_prefixLen_ << 24
                                           | m_parsedToken_.m_prefixOffset_;
                    // TODO: this should also handle reverse
                    sourceToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_;
                    sourceToken.m_next_ = null;
                     sourceToken.m_previous_ = null;
                    sourceToken.m_CELength_ = 0;
                    sourceToken.m_expCELength_ = 0;
                    m_hashTable_.put(sourceToken, sourceToken);
                }
                else {
                    // we could have fished out a reset here
                    if (sourceToken.m_strength_ != TOKEN_RESET_
                        && lastToken != sourceToken) {
                        // otherwise remove sourceToken from where it was.

                        // Take care of the next node
                        if (sourceToken.m_next_ != null) {
                            if (sourceToken.m_next_.m_strength_
                                                   > sourceToken.m_strength_) {
                                sourceToken.m_next_.m_strength_
                                                   = sourceToken.m_strength_;
                            }
                            sourceToken.m_next_.m_previous_
                                                    = sourceToken.m_previous_;
                        }
                        else {
                            // sourcetoken is the last token.
                            // Redefine the tail token.
                            sourceToken.m_listHeader_.m_last_
                                                    = sourceToken.m_previous_;
                        }

                        // Take care of the previous node.
                        if (sourceToken.m_previous_ != null) {
                            sourceToken.m_previous_.m_next_
                                                        = sourceToken.m_next_;
                        }
                        else {
                            // sourcetoken is the first token.
                            // Redefine the head node.
                            sourceToken.m_listHeader_.m_first_
                                                        = sourceToken.m_next_;
                        }
                        sourceToken.m_next_ = null;
                        sourceToken.m_previous_ = null;
                    }
                }
                sourceToken.m_strength_ = m_parsedToken_.m_strength_;
                sourceToken.m_listHeader_ = lastToken.m_listHeader_;

                // 1.  Find the strongest strength in each list, and set
                // strongestP and strongestN accordingly in the headers.
                if (lastStrength == TOKEN_RESET_
                    || sourceToken.m_listHeader_.m_first_ == null) {
                    // If LAST is a reset insert sourceToken in the list.
                    if (sourceToken.m_listHeader_.m_first_ == null) {
                        sourceToken.m_listHeader_.m_first_ = sourceToken;
                        sourceToken.m_listHeader_.m_last_ = sourceToken;
                    }
                    else { // we need to find a place for us
                           // and we'll get in front of the same strength
                        if (sourceToken.m_listHeader_.m_first_.m_strength_
                                                 <= sourceToken.m_strength_) {
                            sourceToken.m_next_
                                          = sourceToken.m_listHeader_.m_first_;
                            sourceToken.m_next_.m_previous_ = sourceToken;
                            sourceToken.m_listHeader_.m_first_ = sourceToken;
                            sourceToken.m_previous_ = null;
                        }
                        else {
                            lastToken = sourceToken.m_listHeader_.m_first_;
                            while (lastToken.m_next_ != null
                                   && lastToken.m_next_.m_strength_
                                                 > sourceToken.m_strength_) {
                                lastToken = lastToken.m_next_;
                            }
                            if (lastToken.m_next_ != null) {
                                lastToken.m_next_.m_previous_ = sourceToken;
                            }
                            else {
                                sourceToken.m_listHeader_.m_last_
                                                               = sourceToken;
                            }
                            sourceToken.m_previous_ = lastToken;
                            sourceToken.m_next_ = lastToken.m_next_;
                            lastToken.m_next_ = sourceToken;
                        }
                    }
                }
                else {
                    // Otherwise (when LAST is not a reset)
                    // if polarity (LAST) == polarity(relation), insert
                    // sourceToken after LAST, otherwise insert before.
                    // when inserting after or before, search to the next
                    // position with the same strength in that direction.
                    // (This is called postpone insertion).
                    if (sourceToken != lastToken) {
                        if (lastToken.m_polarity_ == sourceToken.m_polarity_) {
                            while (lastToken.m_next_ != null
                                   && lastToken.m_next_.m_strength_
                                                   > sourceToken.m_strength_) {
                                lastToken = lastToken.m_next_;
                            }
                            sourceToken.m_previous_ = lastToken;
                            if (lastToken.m_next_ != null) {
                                lastToken.m_next_.m_previous_ = sourceToken;
                            }
                            else {
                                sourceToken.m_listHeader_.m_last_ = sourceToken;
                            }
                            sourceToken.m_next_ = lastToken.m_next_;
                            lastToken.m_next_ = sourceToken;
                        }
                        else {
                            while (lastToken.m_previous_ != null
                                   && lastToken.m_previous_.m_strength_
                                                > sourceToken.m_strength_) {
                                lastToken = lastToken.m_previous_;
                            }
                            sourceToken.m_next_ = lastToken;
                            if (lastToken.m_previous_ != null) {
                                lastToken.m_previous_.m_next_ = sourceToken;
                            }
                            else {
                                sourceToken.m_listHeader_.m_first_
                                                                 = sourceToken;
                            }
                            sourceToken.m_previous_ = lastToken.m_previous_;
                            lastToken.m_previous_ = sourceToken;
                        }
                    }
                    else { // repeated one thing twice in rules, stay with the
                           // stronger strength
                        if (lastStrength < sourceToken.m_strength_) {
                            sourceToken.m_strength_ = lastStrength;
                        }
                    }
                }
                // if the token was a variable top, we're gonna put it in
                if (variableTop == true && m_variableTop_ == null) {
                    variableTop = false;
                    m_variableTop_ = sourceToken;
                }
                // Treat the expansions.
                // There are two types of expansions: explicit (x / y) and
                // reset based propagating expansions
                // (&abc * d * e <=> &ab * d / c * e / c)
                // if both of them are in effect for a token, they are combined.
               sourceToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24
                                          | m_parsedToken_.m_extensionOffset_;
               if (expandNext != 0) {
                   if (sourceToken.m_strength_ == RuleBasedCollator.PRIMARY) {
                       // primary strength kills off the implicit expansion
                       expandNext = 0;
                   }
                   else if (sourceToken.m_expansion_ == 0) {
                       // if there is no expansion, implicit is just added to
                       // the token
                       sourceToken.m_expansion_ = expandNext;
                   }
                   else {
                       // there is both explicit and implicit expansion.
                       // We need to make a combination
                       int start = expandNext & 0xFFFFFF;
                       int size = expandNext >>> 24;
                       if (size > 0) {
                          m_source_.append(m_source_.substring(start,
                                                               start + size));
                       }
                          start = m_parsedToken_.m_extensionOffset_;
                       m_source_.append(m_source_.substring(start,
                                      start + m_parsedToken_.m_extensionLen_));
                       sourceToken.m_expansion_ = (size
                                       + m_parsedToken_.m_extensionLen_) << 24
                                       | m_extraCurrent_;
                       m_extraCurrent_ += size + m_parsedToken_.m_extensionLen_;
                   }
                }
               // if the previous token was a reset before, the strength of this
               // token must match the strength of before. Otherwise we have an
               // undefined situation.
               // In other words, we currently have a cludge which we use to
               // represent &a >> x. This is written as &[before 2]a << x.
               if((lastToken.m_flags_ & TOKEN_BEFORE_) != 0) {
                   int beforeStrength = (lastToken.m_flags_ & TOKEN_BEFORE_) - 1;
                   if(beforeStrength != sourceToken.m_strength_) {
                          throwParseException(m_source_.toString(), m_current_);
                   }
               }

            }
            else {
                if (lastToken != null && lastStrength == TOKEN_RESET_) {
                    // if the previous token was also a reset, this means that
                    // we have two consecutive resets and we want to remove the
                    // previous one if empty
                    if (m_resultLength_ > 0 && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
                        m_resultLength_ --;
                    }
                }
                if (sourceToken == null) {
                    // this is a reset, but it might still be somewhere in the
                    // tailoring, in shorter form
                    int searchCharsLen = m_parsedToken_.m_charsLen_;
                    while (searchCharsLen > 1 && sourceToken == null) {
                        searchCharsLen --;
                        // key = searchCharsLen << 24 | charsOffset;
                        m_utilToken_.m_source_ = searchCharsLen << 24
                                             | m_parsedToken_.m_charsOffset_;
                        m_utilToken_.m_rules_ = m_source_;
                        sourceToken = m_hashTable_.get(m_utilToken_);
                    }
                    if (sourceToken != null) {
                        expandNext = (m_parsedToken_.m_charsLen_
                                                      - searchCharsLen) << 24
                                        | (m_parsedToken_.m_charsOffset_
                                           + searchCharsLen);
                    }
                }
                if ((specs & TOKEN_BEFORE_) != 0) {
                    if (top == false) {
                        // we're doing before & there is no indirection
                        int strength = (specs & TOKEN_BEFORE_) - 1;
                        if (sourceToken != null
                            && sourceToken.m_strength_ != TOKEN_RESET_) {
                            // this is a before that is already ordered in the UCA
                            // - so we need to get the previous with good strength
                            while (sourceToken.m_strength_ > strength
                                   && sourceToken.m_previous_ != null) {
                                sourceToken = sourceToken.m_previous_;
                            }
                            // here, either we hit the strength or NULL
                            if (sourceToken.m_strength_ == strength) {
                                if (sourceToken.m_previous_ != null) {
                                    sourceToken = sourceToken.m_previous_;
                                }
                                else { // start of list
                                    sourceToken
                                         = sourceToken.m_listHeader_.m_reset_;
                                }
                            }
                            else { // we hit NULL, we should be doing the else part
                                sourceToken
                                         = sourceToken.m_listHeader_.m_reset_;
                                sourceToken = getVirginBefore(sourceToken,
                                                              strength);
                            }
                        }
                        else {
                            sourceToken
                                      = getVirginBefore(sourceToken, strength);
                        }
                    }
                    else {
                        // this is both before and indirection
                        top = false;
                        m_listHeader_[m_resultLength_] = new TokenListHeader();
                        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
                        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
                        m_listHeader_[m_resultLength_].m_indirect_ = true;
                        // we need to do slightly more work. we need to get the
                        // baseCE using the inverse UCA & getPrevious. The next
                        // bound is not set, and will be decided in ucol_bld
                        int strength = (specs & TOKEN_BEFORE_) - 1;
                        int baseCE = INDIRECT_BOUNDARIES_[
                                   m_parsedToken_.m_indirectIndex_].m_startCE_;
                        int baseContCE = INDIRECT_BOUNDARIES_[
                               m_parsedToken_.m_indirectIndex_].m_startContCE_;
                        int ce[] = new int[2];
                        if((baseCE >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_)
                        && (baseCE >>> 24 <=  RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */
                            int primary = baseCE & RuleBasedCollator.CE_PRIMARY_MASK_ | (baseContCE & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16;
                            int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary);
                            int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1);
                            ce[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
                            ce[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
                        } else {
                            CollationParsedRuleBuilder.InverseUCA invuca
                                = CollationParsedRuleBuilder.INVERSE_UCA_;
                            invuca.getInversePrevCE(baseCE, baseContCE, strength,
                                    ce);
                        }
                        m_listHeader_[m_resultLength_].m_baseCE_ = ce[0];
                        m_listHeader_[m_resultLength_].m_baseContCE_ = ce[1];
                        m_listHeader_[m_resultLength_].m_nextCE_ = 0;
                        m_listHeader_[m_resultLength_].m_nextContCE_ = 0;

                        sourceToken = new Token();
                        expandNext = initAReset(0, sourceToken);
                    }
                }
                // 5 If the relation is a reset:
                // If sourceToken is null
                // Create new list, create new sourceToken, make the baseCE
                // from source, put the sourceToken in ListHeader of the new
                // list
                if (sourceToken == null) {
                    if (m_listHeader_[m_resultLength_] == null) {
                        m_listHeader_[m_resultLength_] = new TokenListHeader();
                    }
                    // 3 Consider each item: relation, source, and expansion:
                    // e.g. ...< x / y ...
                    // First convert all expansions into normal form.
                    // Examples:
                    // If "xy" doesn't occur earlier in the list or in the UCA,
                    // convert &xy * c * d * ... into &x * c/y * d * ...
                    // Note: reset values can never have expansions, although
                    // they can cause the very next item to have one. They may
                    // be contractions, if they are found earlier in the list.
                    if (top == false) {
                        CollationElementIterator coleiter
                        = RuleBasedCollator.UCA_.getCollationElementIterator(
                            m_source_.substring(m_parsedToken_.m_charsOffset_,
                                                m_parsedToken_.m_charsOffset_
                                                + m_parsedToken_.m_charsLen_));

                        int CE = coleiter.next();
                        // offset to the character in the full rule string
                        int expand = coleiter.getOffset()
                                     + m_parsedToken_.m_charsOffset_;
                        int SecondCE = coleiter.next();

                        m_listHeader_[m_resultLength_].m_baseCE_
                                                             = CE & 0xFFFFFF3F;
                        if (RuleBasedCollator.isContinuation(SecondCE)) {
                            m_listHeader_[m_resultLength_].m_baseContCE_
                                                                    = SecondCE;
                        }
                        else {
                            m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
                        }
                        m_listHeader_[m_resultLength_].m_nextCE_ = 0;
                        m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
                        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
                        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
                        m_listHeader_[m_resultLength_].m_indirect_ = false;
                        sourceToken = new Token();
                        expandNext = initAReset(expand, sourceToken);
                    }
                    else { // top == TRUE
                        top = false;
                        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
                        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
                        m_listHeader_[m_resultLength_].m_indirect_ = true;
                        IndirectBoundaries ib = INDIRECT_BOUNDARIES_[
                                              m_parsedToken_.m_indirectIndex_];
                        m_listHeader_[m_resultLength_].m_baseCE_
                                                               = ib.m_startCE_;
                        m_listHeader_[m_resultLength_].m_baseContCE_
                                                           = ib.m_startContCE_;
                        m_listHeader_[m_resultLength_].m_nextCE_
                                                               = ib.m_limitCE_;
                        m_listHeader_[m_resultLength_].m_nextContCE_
                                                           = ib.m_limitContCE_;
                        sourceToken = new Token();
                        expandNext = initAReset(0, sourceToken);
                    }
                }
                else { // reset to something already in rules
                    top = false;
                }
            }
            // 7 After all this, set LAST to point to sourceToken, and goto
            // step 3.
            lastToken = sourceToken;
        }

        if (m_resultLength_ > 0
            && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
            m_resultLength_ --;
        }
        return m_resultLength_;
    }

    /**
     * Formats and throws a ParseException
     * @param rules collation rule that failed
     * @param offset failed offset in rules
     * @throws ParseException with failure information
     */
    private static final void throwParseException(String rules, int offset)
                                                          throws ParseException
    {
        // for pre-context
        String precontext = rules.substring(0, offset);
        String postcontext = rules.substring(offset, rules.length());
        StringBuilder error = new StringBuilder(
                                    "Parse error occurred in rule at offset ");
        error.append(offset);
        error.append("\n after the prefix \"");
        error.append(precontext);
        error.append("\" before the suffix \"");
        error.append(postcontext);
        throw new ParseException(error.toString(), offset);
    }

    private final boolean doSetTop() {
        m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
        m_source_.append((char)0xFFFE);
        IndirectBoundaries ib =
                  INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_];
        m_source_.append((char)(ib.m_startCE_ >> 16));
        m_source_.append((char)(ib.m_startCE_ & 0xFFFF));
        m_extraCurrent_ += 3;
        if (INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_
                                                       ].m_startContCE_ == 0) {
            m_parsedToken_.m_charsLen_ = 3;
        }
        else {
            m_source_.append((char)(INDIRECT_BOUNDARIES_[
                                        m_parsedToken_.m_indirectIndex_
                                    ].m_startContCE_ >> 16));
            m_source_.append((char)(INDIRECT_BOUNDARIES_[
                                        m_parsedToken_.m_indirectIndex_
                                    ].m_startContCE_ & 0xFFFF));
            m_extraCurrent_ += 2;
            m_parsedToken_.m_charsLen_ = 5;
        }
        return true;
    }

    private static boolean isCharNewLine(char c) {
        switch (c) {
        case 0x000A: /* LF */
        case 0x000D: /* CR */
        case 0x000C: /* FF */
        case 0x0085: /* NEL */
        case 0x2028: /* LS */
        case 0x2029: /* PS */
            return true;
        default:
            return false;
        }
    }

    /**
     * Parses the next token.
     *
     * It updates/accesses the following member variables:
     * m_current_: Index to the next unparsed character (not code point)
     *    in the character array (a StringBuilder object) m_source_.
     * m_parsedToken_:  The parsed token.  The following of the token are updated.
     *    .m_strength: The strength of the token.
     *    .m_charsOffset, m_charsLen_: Index to the first character (after operators),
     *         and number of characters in the token.
     *         This may be in the main string, or in the appended string.
     *    .m_extensionOffset_,  .m_extensionLen_:
     *    .m_flags:
     *    .m_prefixOffset, .m_prefixLen:  Used when "|" is used to specify "context before".
     *    .m_indirectIndex:
     * @param startofrules
     *            flag indicating if we are at the start of rules
     * @return the offset of the next unparsed char
     * @exception ParseException
     *                thrown when rule parsing fails
     */
    private int parseNextToken(boolean startofrules) throws ParseException
    {

        if (m_inRange_) {
            // We are not done processing a range.  Continue it.
            return processNextCodePointInRange();
        } else if (m_isStarred_) {
            // We are not done processing a starred token.  Continue it.
            return processNextTokenInTheStarredList();
        }

        // Get the next token.
        int nextOffset = parseNextTokenInternal(startofrules);

        // If the next token is starred and/or in range, we need to handle it here.
        if (m_inRange_) {
            // A new range has started.
            // Check whether it is a chain of ranges with more than one hyphen.
            if (m_lastRangeCp_ > 0 && m_lastRangeCp_ == m_previousCp_) {
                throw new ParseException("Chained range syntax", m_current_);
            }

            // The current token is the first character of the second code point of the range.
            // Process just that, and then proceed with the star.
            m_lastRangeCp_ = m_source_.codePointAt(this.m_parsedToken_.m_charsOffset_);
            if (m_lastRangeCp_ <= m_previousCp_) {
                throw new ParseException("Invalid range", m_current_);
            }

            // Set current range code point to process the range loop
            m_currentRangeCp_ = m_previousCp_ + 1;

            // Set current starred char index to continue processing the starred
            // expression after the range is done.
            m_currentStarredCharIndex_ = m_parsedToken_.m_charsOffset_
                + Character.charCount(m_lastRangeCp_);
            m_lastStarredCharIndex_ = m_parsedToken_.m_charsOffset_ + m_parsedToken_.m_charsLen_ - 1;

            return processNextCodePointInRange();
        } else if (m_isStarred_) {
            // We define two indices m_currentStarredCharIndex_ and m_lastStarredCharIndex_ so that
            // [m_currentStarredCharIndex_ .. m_lastStarredCharIndex_], both inclusive, need to be
            // separated into several tokens and returned.
            m_currentStarredCharIndex_ = m_parsedToken_.m_charsOffset_;
            m_lastStarredCharIndex_ =  m_parsedToken_.m_charsOffset_ + m_parsedToken_.m_charsLen_ - 1;

            return processNextTokenInTheStarredList();
        }
        return nextOffset;
    }

    private int processNextCodePointInRange() throws ParseException {
        int nChars = Character.charCount(m_currentRangeCp_);
        m_source_.appendCodePoint(m_currentRangeCp_);

        m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
        m_parsedToken_.m_charsLen_ = nChars;

        m_extraCurrent_ += nChars;
        ++m_currentRangeCp_;
        if (m_currentRangeCp_ > m_lastRangeCp_) {
            // All the code points in the range are processed.
            // Turn the range flag off.
            m_inRange_ = false;

            // If there is a starred portion remaining in the current
            // parsed token, resume the starred operation.
            if (m_currentStarredCharIndex_ <= m_lastStarredCharIndex_) {
                m_isStarred_ = true;
            } else {
                m_isStarred_ = false;
            }
        } else {
            m_previousCp_ = m_currentRangeCp_;
        }
       return m_current_;
    }


    /**
     * Extracts the next token from the starred token from
     *   m_currentStarredCharIndex_ and returns it.
     * @return the offset of the next unparsed char
     * @throws ParseException
     */
    private int processNextTokenInTheStarredList() throws ParseException {
        // Extract the characters corresponding to the next code point.
        int cp = m_source_.codePointAt(m_currentStarredCharIndex_);
        int nChars = Character.charCount(cp);

        m_parsedToken_.m_charsLen_ = nChars;
        m_parsedToken_.m_charsOffset_ = m_currentStarredCharIndex_;
        m_currentStarredCharIndex_ += nChars;

        // When we are done parsing the starred string, turn the flag off so that
        // the normal processing is restored.
        if (m_currentStarredCharIndex_ > m_lastStarredCharIndex_) {
            m_isStarred_ = false;
        }
        m_previousCp_ = cp;
        return m_current_;
    }

    private int resetToTop(boolean top, boolean variableTop,
                           int extensionOffset, int newExtensionLen,
                           byte byteBefore) throws ParseException {
        m_parsedToken_.m_indirectIndex_ = 5;
        top = doSetTop();
        return doEndParseNextToken(TOKEN_RESET_,
                                   top,
                                   extensionOffset,
                                   newExtensionLen,
                                   variableTop, byteBefore);
    }

    /**
     * Gets the next token and sets the necessary internal variables.
     * This function parses a starred string as a single token, which will be separated
     * in the calling function.
     * @param startofrules Boolean value indicating whether this is the first rule
     * @return the offset of the next unparsed char
     * @throws ParseException
     */
    @SuppressWarnings("fallthrough")
    private int parseNextTokenInternal(boolean startofrules) throws ParseException {
        boolean variabletop = false;
        boolean top = false;
        boolean inchars = true;
        boolean inquote = false;
        boolean wasinquote = false;
        byte before = 0;
        boolean isescaped = false;
        int /*newcharslen = 0,*/ newextensionlen = 0;
        int /*charsoffset = 0,*/ extensionoffset = 0;
        int newstrength = TOKEN_UNSET_;

        initializeParsedToken();

        int limit = m_rules_.length();
        while (m_current_ < limit) {
            char ch = m_source_.charAt(m_current_);
            if (inquote) {
                if (ch == 0x0027) { // '\''
                    inquote = false;
                }
                else {
                    if ((m_parsedToken_.m_charsLen_ == 0) || inchars) {
                        if (m_parsedToken_.m_charsLen_ == 0) {
                            m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
                        }
                        m_parsedToken_.m_charsLen_ ++;
                    }
                    else {
                        if (newextensionlen == 0) {
                            extensionoffset = m_extraCurrent_;
                        }
                        newextensionlen ++;
                    }
                }
            }
            else if (isescaped) {
                isescaped = false;
                if (newstrength == TOKEN_UNSET_) {
                    throwParseException(m_rules_, m_current_);
                }
                if (ch != 0 && m_current_ != limit) {
                    if (inchars) {
                        if (m_parsedToken_.m_charsLen_ == 0) {
                            m_parsedToken_.m_charsOffset_ = m_current_;
                        }
                        m_parsedToken_.m_charsLen_ ++;
                    }
                    else {
                        if (newextensionlen == 0) {
                            extensionoffset = m_current_;
                        }
                        newextensionlen ++;
                    }
                }
            }
            else {
                if (!PatternProps.isWhiteSpace(ch)) {
                    // Sets the strength for this entry
                    switch (ch) {
                    case 0x003D : // '='
                        if (newstrength != TOKEN_UNSET_) {
                            return doEndParseNextToken(newstrength,
                                                       top,
                                                       extensionoffset,
                                                       newextensionlen,
                                                       variabletop, before);
                        }
                        // if we start with strength, we'll reset to top
                        if (startofrules == true) {
                            return resetToTop(top, variabletop, extensionoffset,
                                              newextensionlen, before);
                        }
                        newstrength = Collator.IDENTICAL;
                        if (m_source_.charAt(m_current_ + 1) == 0x002A) { // '*'
                            m_current_++;
                            m_isStarred_ = true;
                        }
                        break;
                    case 0x002C : // ','
                        if (newstrength != TOKEN_UNSET_) {
                            return doEndParseNextToken(newstrength,
                                                       top,
                                                       extensionoffset,
                                                       newextensionlen,
                                                       variabletop, before);
                        }
                        // if we start with strength, we'll reset to top
                        if (startofrules == true) {
                            return resetToTop(top, variabletop, extensionoffset,
                                              newextensionlen, before);
                        }
                        newstrength = Collator.TERTIARY;
                        break;
                    case 0x003B : // ';'
                        if (newstrength != TOKEN_UNSET_) {
                            return doEndParseNextToken(newstrength,
                                                       top,
                                                       extensionoffset,
                                                       newextensionlen,
                                                       variabletop, before);
                        }
                        //if we start with strength, we'll reset to top
                        if(startofrules == true) {
                            return resetToTop(top, variabletop, extensionoffset,
                                              newextensionlen, before);
                        }
                        newstrength = Collator.SECONDARY;
                        break;
                    case 0x003C : // '<'
                        if (newstrength != TOKEN_UNSET_) {
                            return doEndParseNextToken(newstrength,
                                                       top,
                                                       extensionoffset,
                                                       newextensionlen,
                                                       variabletop, before);
                        }
                       // if we start with strength, we'll reset to top
                       if (startofrules == true) {
                            return resetToTop(top, variabletop, extensionoffset,
                                              newextensionlen, before);
                        }
                        // before this, do a scan to verify whether this is
                        // another strength
                        if (m_source_.charAt(m_current_ + 1) == 0x003C) {
                            m_current_ ++;
                            if (m_source_.charAt(m_current_ + 1) == 0x003C) {
                                m_current_ ++; // three in a row!
                                newstrength = Collator.TERTIARY;
                            }
                            else { // two in a row
                                newstrength = Collator.SECONDARY;
                            }
                        }
                        else { // just one
                            newstrength = Collator.PRIMARY;
                        }
                        if (m_source_.charAt(m_current_ + 1) == 0x002A) { // '*'
                            m_current_++;
                            m_isStarred_ = true;
                        }
                        break;

                    case 0x0026 : // '&'
                        if (newstrength != TOKEN_UNSET_) {
                            return doEndParseNextToken(newstrength,
                                                       top,
                                                       extensionoffset,
                                                       newextensionlen,
                                                       variabletop, before);
                        }
                        newstrength = TOKEN_RESET_; // PatternEntry::RESET = 0
                        break;
                    case 0x005b : // '['
                        // options - read an option, analyze it
                        m_optionEnd_ = m_rules_.indexOf(0x005d, m_current_);
                        if (m_optionEnd_ != -1) { // ']'
                            byte result = readAndSetOption();
                            m_current_ = m_optionEnd_;
                            if ((result & TOKEN_TOP_MASK_) != 0) {
                                if (newstrength == TOKEN_RESET_) {
                                    doSetTop();
                                    if (before != 0) {
                                        // This is a combination of before and
                                        // indirection like
                                        // '&[before 2][first regular]>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_)
                && (basece >>> 24 <=  RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */

            int primary = basece & RuleBasedCollator.CE_PRIMARY_MASK_ | (basecontce & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16;
            int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary);
            ch = RuleBasedCollator.impCEGen_.getCodePointFromRaw(raw-1);
            int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1);
            m_utilCEBuffer_[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
            m_utilCEBuffer_[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;

            m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
            m_source_.append('\uFFFE');
            m_source_.append((char)ch);
            m_extraCurrent_ += 2;
            m_parsedToken_.m_charsLen_++;

            m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24)
            | m_parsedToken_.m_charsOffset_;
            m_utilToken_.m_rules_ = m_source_;
            sourcetoken = m_hashTable_.get(m_utilToken_);

            if(sourcetoken == null) {
                m_listHeader_[m_resultLength_] = new TokenListHeader();
                m_listHeader_[m_resultLength_].m_baseCE_
                    = m_utilCEBuffer_[0] & 0xFFFFFF3F;
                if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) {
                    m_listHeader_[m_resultLength_].m_baseContCE_
                    = m_utilCEBuffer_[1];
                }
                else {
                    m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
                }
                m_listHeader_[m_resultLength_].m_nextCE_ = 0;
                m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
                m_listHeader_[m_resultLength_].m_previousCE_ = 0;
                m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
                m_listHeader_[m_resultLength_].m_indirect_ = false;

                sourcetoken = new Token();
                initAReset(-1, sourcetoken);
            }

        } else {

            // first ce and second ce m_utilCEBuffer_
            /*int invpos = */CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE(
                                                         basece, basecontce,
                                                         strength, m_utilCEBuffer_);
            // we got the previous CE. Now we need to see if the difference between
            // the two CEs is really of the requested strength.
            // if it's a bigger difference (we asked for secondary and got primary), we
            // need to modify the CE.
            if(CollationParsedRuleBuilder.INVERSE_UCA_.getCEStrengthDifference(basece, basecontce, m_utilCEBuffer_[0], m_utilCEBuffer_[1]) < strength) {
                // adjust the strength
                // now we are in the situation where our baseCE should actually be modified in
                // order to get the CE in the right position.
                if(strength == Collator.SECONDARY) {
                    m_utilCEBuffer_[0] = basece - 0x0200;
                } else { // strength == UCOL_TERTIARY
                    m_utilCEBuffer_[0] = basece - 0x02;
                }
                if(RuleBasedCollator.isContinuation(basecontce)) {
                    if(strength == Collator.SECONDARY) {
                        m_utilCEBuffer_[1] = basecontce - 0x0200;
                    } else { // strength == UCOL_TERTIARY
                        m_utilCEBuffer_[1] = basecontce - 0x02;
                    }
                }
            }

/*
            // the code below relies on getting a code point from the inverse table, in order to be
            // able to merge the situations like &x < 9 &[before 1]a < d. This won't work:
            // 1. There are many code points that have the same CE
            // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken.
            // Also, in case when there is no equivalent strength before an element, we have to actually
            // construct one. For example, &[before 2]a << x won't result in x << a, because the element
            // before a is a primary difference.
            ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_table_[3 * invpos
                                                                      + 2];
            if ((ch &  INVERSE_SIZE_MASK_) != 0) {
                int offset = ch & INVERSE_OFFSET_MASK_;
                ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_continuations_[
                                                                           offset];
            }
            m_source_.append((char)ch);
            m_extraCurrent_ ++;
            m_parsedToken_.m_charsOffset_ = m_extraCurrent_ - 1;
            m_parsedToken_.m_charsLen_ = 1;

            // We got an UCA before. However, this might have been tailored.
            // example:
            // &\u30ca = \u306a
            // &[before 3]\u306a<<<\u306a|\u309d

            m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24)
                                                 | m_parsedToken_.m_charsOffset_;
            m_utilToken_.m_rules_ = m_source_;
            sourcetoken = (Token)m_hashTable_.get(m_utilToken_);
*/

            // here is how it should be. The situation such as &[before 1]a < x, should be
            // resolved exactly as if we wrote &a > x.
            // therefore, I don't really care if the UCA value before a has been changed.
            // However, I do care if the strength between my element and the previous element
            // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll
            // have to construct the base CE.

            // if we found a tailored thing, we have to use the UCA value and
            // construct a new reset token with constructed name
            //if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) {
                // character to which we want to anchor is already tailored.
                // We need to construct a new token which will be the anchor point
                //m_source_.setCharAt(m_extraCurrent_ - 1, '\uFFFE');
                //m_source_.append(ch);
                //m_extraCurrent_ ++;
                //m_parsedToken_.m_charsLen_ ++;
                // grab before
                m_parsedToken_.m_charsOffset_ -= 10;
                m_parsedToken_.m_charsLen_ += 10;
                m_listHeader_[m_resultLength_] = new TokenListHeader();
                m_listHeader_[m_resultLength_].m_baseCE_
                                                 = m_utilCEBuffer_[0] & 0xFFFFFF3F;
                if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) {
                    m_listHeader_[m_resultLength_].m_baseContCE_
                                                              = m_utilCEBuffer_[1];
                }
                else {
                    m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
                }
                m_listHeader_[m_resultLength_].m_nextCE_ = 0;
                m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
                m_listHeader_[m_resultLength_].m_previousCE_ = 0;
                m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
                m_listHeader_[m_resultLength_].m_indirect_ = false;
                sourcetoken = new Token();
                initAReset(-1, sourcetoken);
            //}
        }
        return sourcetoken;
    }

    /**
     * Processing Description.
     * 1. Build a m_listHeader_. Each list has a header, which contains two lists
     * (positive and negative), a reset token, a baseCE, nextCE, and
     * previousCE. The lists and reset may be null.
     * 2. As you process, you keep a LAST pointer that points to the last token
     * you handled.
     * @param expand string offset, -1 for null strings
     * @param targetToken token to update
     * @return expandnext offset
     * @throws ParseException thrown when rules syntax failed
     */
    private int initAReset(int expand, Token targetToken) throws ParseException
    {
        if (m_resultLength_ == m_listHeader_.length - 1) {
            // Unfortunately, this won't work, as we store addresses of lhs in
            // token
            TokenListHeader temp[] = new TokenListHeader[m_resultLength_ << 1];
            System.arraycopy(m_listHeader_, 0, temp, 0, m_resultLength_ + 1);
            m_listHeader_ = temp;
        }
        // do the reset thing
        targetToken.m_rules_ = m_source_;
        targetToken.m_source_ = m_parsedToken_.m_charsLen_ << 24
                                | m_parsedToken_.m_charsOffset_;
        targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24
                                   | m_parsedToken_.m_extensionOffset_;
        // keep the flags around so that we know about before
        targetToken.m_flags_ = m_parsedToken_.m_flags_;

        if (m_parsedToken_.m_prefixOffset_ != 0) {
            throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1);
        }

        targetToken.m_prefix_ = 0;
        // TODO: this should also handle reverse
        targetToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_;
        targetToken.m_strength_ = TOKEN_RESET_;
        targetToken.m_next_ = null;
        targetToken.m_previous_ = null;
        targetToken.m_CELength_ = 0;
        targetToken.m_expCELength_ = 0;
        targetToken.m_listHeader_ = m_listHeader_[m_resultLength_];
        m_listHeader_[m_resultLength_].m_first_ = null;
        m_listHeader_[m_resultLength_].m_last_ = null;
        m_listHeader_[m_resultLength_].m_first_ = null;
        m_listHeader_[m_resultLength_].m_last_ = null;
        m_listHeader_[m_resultLength_].m_reset_ = targetToken;

        /* 3 Consider each item: relation, source, and expansion:
         * e.g. ...< x / y ...
         * First convert all expansions into normal form. Examples:
         * If "xy" doesn't occur earlier in the list or in the UCA, convert
         * &xy * c * d * ... into &x * c/y * d * ...
         * Note: reset values can never have expansions, although they can
         * cause the very next item to have one. They may be contractions, if
         * they are found earlier in the list.
         */
        int result = 0;
        if (expand > 0) {
            // check to see if there is an expansion
            if (m_parsedToken_.m_charsLen_ > 1) {
                targetToken.m_source_ = ((expand
                                          - m_parsedToken_.m_charsOffset_ )
                                          << 24)
                                          | m_parsedToken_.m_charsOffset_;
                result = ((m_parsedToken_.m_charsLen_
                               + m_parsedToken_.m_charsOffset_ - expand) << 24)
                               | expand;
            }
        }

        m_resultLength_ ++;
        m_hashTable_.put(targetToken, targetToken);
        return result;
    }

    /**
     * Checks if an character is special
     * @param ch character to test
     * @return true if the character is special
     */
    private static final boolean isSpecialChar(char ch)
    {
        return (ch <= 0x002F && ch >= 0x0020) || (ch <= 0x003F && ch >= 0x003A)
               || (ch <= 0x0060 && ch >= 0x005B)
               || (ch <= 0x007E && ch >= 0x007D) || ch == 0x007B;
    }

    private
    UnicodeSet readAndSetUnicodeSet(String source, int start) throws ParseException
    {
      while(source.charAt(start) != '[') { /* advance while we find the first '[' */
        start++;
      }
      // now we need to get a balanced set of '[]'. The problem is that a set can have
      // many, and *end point to the first closing '['
      int noOpenBraces = 1;
      int current = 1; // skip the opening brace
      while(start+current < source.length() && noOpenBraces != 0) {
        if(source.charAt(start+current) == '[') {
          noOpenBraces++;
        } else if(source.charAt(start+current) == ']') { // closing brace
          noOpenBraces--;
        }
        current++;
      }
      //int nextBrace = -1;

      if(noOpenBraces != 0 || (/*nextBrace =*/ source.indexOf("]", start+current) /*']'*/) == -1) {
        throwParseException(m_rules_, start);
      }
      return new UnicodeSet(source.substring(start, start+current)); //uset_openPattern(start, current);
    }

    /** in C, optionarg is passed by reference to function.
     *  We use a private int to simulate this.
     */
    private int m_optionarg_ = 0;

    private int readOption(String rules, int start, int optionend)
    {
        m_optionarg_ = 0;
        int i = 0;
        while (i < RULES_OPTIONS_.length) {
            String option = RULES_OPTIONS_[i].m_name_;
            int optionlength = option.length();
            if (rules.length() > start + optionlength
                && option.equalsIgnoreCase(rules.substring(start,
                                                      start + optionlength))) {
                if (optionend - start > optionlength) {
                    m_optionarg_ = start + optionlength;
                    // start of the options, skip space
                    while (m_optionarg_ < optionend && PatternProps.isWhiteSpace(rules.charAt(m_optionarg_)))
                    {   // eat whitespace
                        m_optionarg_ ++;
                    }
                }
                break;
            }
            i ++;
        }
        if(i == RULES_OPTIONS_.length) {
            i = -1;
        }
        return i;
    }
    
    /**
     * Reads and set collation options
     * @return TOKEN_SUCCESS if option is set correct, 0 otherwise
     * @exception ParseException thrown when options in rules are wrong
     */
    private byte readAndSetOption() throws ParseException
    {
        int start = m_current_ + 1; // skip opening '['
        int i = readOption(m_rules_, start, m_optionEnd_);

        int optionarg = m_optionarg_;

        if (i < 0) {
            throwParseException(m_rules_, start);
        }

        if (i < 7) {
            if (optionarg != 0) {
                for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length;
                                                                        j ++) {
                     String subname = RULES_OPTIONS_[i].m_subOptions_[j];
                     int size = optionarg + subname.length();
                     if (m_rules_.length() > size
                         && subname.equalsIgnoreCase(m_rules_.substring(
                                                           optionarg, size))) {
                         setOptions(m_options_, RULES_OPTIONS_[i].m_attribute_,
                             RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]);
                         return TOKEN_SUCCESS_MASK_;
                     }
                }
            }
            throwParseException(m_rules_, optionarg);
        }
        else if (i == 7) { // variable top
            return TOKEN_SUCCESS_MASK_ | TOKEN_VARIABLE_TOP_MASK_;
        }
        else if (i == 8) { // rearrange
            return TOKEN_SUCCESS_MASK_;
        }
        else if (i == 9) { // before
            if (optionarg != 0) {
                for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length;
                                                                        j ++) {
                     String subname = RULES_OPTIONS_[i].m_subOptions_[j];
                     int size = optionarg + subname.length();
                     if (m_rules_.length() > size
                         && subname.equalsIgnoreCase(
                                               m_rules_.substring(optionarg,
                                              optionarg + subname.length()))) {
                         return (byte)(TOKEN_SUCCESS_MASK_
                            | RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]
                            + 1);
                     }
                }
            }
            throwParseException(m_rules_, optionarg);
        }
        else if (i == 10) {  // top, we are going to have an array with
            // structures of limit CEs index to this array will be
            // src->parsedToken.indirectIndex
            m_parsedToken_.m_indirectIndex_ = 0;
            return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
        }
        else if (i < 13) { // first, last
            for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; j ++) {
                String subname = RULES_OPTIONS_[i].m_subOptions_[j];
                int size = optionarg + subname.length();
                if (m_rules_.length() > size
                    && subname.equalsIgnoreCase(m_rules_.substring(optionarg,
                                                                   size))) {
                    m_parsedToken_.m_indirectIndex_ = (char)(i - 10 + (j << 1));
                    return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
                }
            }
            throwParseException(m_rules_, optionarg);
        }
        else if(i == 13 || i == 14) { // copy and remove are handled before normalization
            // we need to move end here
            int noOpenBraces = 1;
            m_current_++; // skip opening brace
            while(m_current_ < m_source_.length() && noOpenBraces != 0) {
                if(m_source_.charAt(m_current_) == '[') {
                  noOpenBraces++;
                } else if(m_source_.charAt(m_current_) == ']') { // closing brace
                  noOpenBraces--;
                }
                m_current_++;
            }
            m_optionEnd_ = m_current_-1;
            return TOKEN_SUCCESS_MASK_;
        }
        else if(i == 16) { 
            m_current_ = m_optionarg_; // skip opening brace and name 
            parseScriptReorder(); 
            return TOKEN_SUCCESS_MASK_; 
        } 
        else {
            throwParseException(m_rules_, optionarg);
        }
        return TOKEN_SUCCESS_MASK_; // we will never reach here.
    }

    /**
     * Set collation option
     * @param optionset option set to set
     * @param attribute type to set
     * @param value attribute value
     */
    private void setOptions(OptionSet optionset, int attribute, int value)
    {
        switch (attribute) {
            case RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_ :
                optionset.m_isHiragana4_
                            = (value == RuleBasedCollator.AttributeValue.ON_);
                break;
            case RuleBasedCollator.Attribute.FRENCH_COLLATION_ :
                optionset.m_isFrenchCollation_
                             = (value == RuleBasedCollator.AttributeValue.ON_);
                break;
            case RuleBasedCollator.Attribute.ALTERNATE_HANDLING_ :
                optionset.m_isAlternateHandlingShifted_
                             = (value
                                == RuleBasedCollator.AttributeValue.SHIFTED_);
                break;
            case RuleBasedCollator.Attribute.CASE_FIRST_ :
                optionset.m_caseFirst_ = value;
                break;
            case RuleBasedCollator.Attribute.CASE_LEVEL_ :
                optionset.m_isCaseLevel_
                             = (value == RuleBasedCollator.AttributeValue.ON_);
                break;
            case RuleBasedCollator.Attribute.NORMALIZATION_MODE_ :
                if (value == RuleBasedCollator.AttributeValue.ON_) {
                    value = Collator.CANONICAL_DECOMPOSITION;
                }
                optionset.m_decomposition_ = value;
                break;
            case RuleBasedCollator.Attribute.STRENGTH_ :
                optionset.m_strength_ = value;
                break;
            default :
                break;
        }
      }

    UnicodeSet getTailoredSet() throws ParseException
    {
        boolean startOfRules = true;
        UnicodeSet tailored = new UnicodeSet();
        String pattern;
        CanonicalIterator it = new CanonicalIterator("");

        m_parsedToken_.m_strength_ = TOKEN_UNSET_;
        int sourcelimit = m_source_.length();
        //int expandNext = 0;

        while (m_current_ < sourcelimit) {
        m_parsedToken_.m_prefixOffset_ = 0;
        if (parseNextToken(startOfRules) < 0) {
            // we have reached the end
            continue;
        }
        startOfRules = false;
        // The idea is to tokenize the rule set. For each non-reset token,
        // we add all the canonicaly equivalent FCD sequences
            if(m_parsedToken_.m_strength_ != TOKEN_RESET_) {
                it.setSource(m_source_.substring(
                      m_parsedToken_.m_charsOffset_,
                      m_parsedToken_.m_charsOffset_+m_parsedToken_.m_charsLen_));
                pattern = it.next();
                while(pattern != null) {
                      if(Normalizer.quickCheck(pattern, Normalizer.FCD,0) != Normalizer.NO) {
                        tailored.add(pattern);
                    }
                    pattern = it.next();
                }
            }
        }
        return tailored;
    }

    final private String preprocessRules(String rules) throws ParseException {
      int optionNumber = -1;
      int setStart = 0;
      int i = 0;
      while(i < rules.length()) {
        if(rules.charAt(i) == 0x005B) { // [
          optionNumber = readOption(rules, i+1, rules.length());
          setStart = m_optionarg_;
          if(optionNumber == 13) { /* copy - parts of UCA to tailoring */
            UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
              if(m_copySet_ == null) {
                m_copySet_ = newSet;
              } else {
                m_copySet_.addAll(newSet);
              }
          } else if(optionNumber == 14) {
            UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
            if(m_removeSet_ == null) {
              m_removeSet_ = newSet;
            } else {
              m_removeSet_.addAll(newSet);
            }
          } else if(optionNumber == 19) {
            int optionEndOffset = rules.indexOf(']', i) + 1;
            ULocale locale = ULocale.forLanguageTag(rules.substring(setStart, optionEndOffset-1));
            UResourceBundle bundle = UResourceBundle.getBundleInstance(
                ICUResourceBundle.ICU_BASE_NAME + "/coll", locale.getBaseName());

            String type = locale.getKeywordValue("collation");
            if(type == null){
              type = "standard";
            }

            String importRules = bundle.get("collations")
                                 .get(type)
                                 .get("Sequence")
                                 .getString();

            rules = rules.substring(0, i) + importRules + rules.substring(optionEndOffset);
          }
        }
        i++;
      }
      return rules;
    }
    
    /* This is the data that is used for non-script reordering codes. These _must_ be kept
     * in order that they are to be applied as defaults and in synch with the Collator.ReorderCodes statics.
     */
    static final String ReorderingTokensArray[] = {
        "SPACE",
        "PUNCT",
        "SYMBOL",
        "CURRENCY",
        "DIGIT",
    };

    int findReorderingEntry(String name) {
        for (int tokenIndex = 0; tokenIndex < ReorderingTokensArray.length; tokenIndex++) {
            if (name.equalsIgnoreCase(ReorderingTokensArray[tokenIndex])) {
                return tokenIndex + ReorderCodes.FIRST;
            }
        }
        return UScript.INVALID_CODE;
    }
    
    private void parseScriptReorder() throws ParseException { 
        ArrayList tempOrder = new ArrayList(); 
        int end = m_rules_.indexOf(']', m_current_);
        if (end == -1) {
            return;
        }
        String tokenString = m_rules_.substring(m_current_, end);
        String[] tokens = tokenString.split("\\s+", 0);
        String token;
        for (int tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) {
            token = tokens[tokenIndex];
            int reorderCode = findReorderingEntry(token);
            if (reorderCode == UScript.INVALID_CODE) {
                reorderCode = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, token); 
                if (reorderCode < 0) {
                    throw new ParseException(m_rules_, tokenIndex);
                }
            }
            tempOrder.add(reorderCode);
        }
        m_options_.m_scriptOrder_ = new int[tempOrder.size()]; 
        for(int i = 0; i < tempOrder.size(); i++) { 
            m_options_.m_scriptOrder_[i] = tempOrder.get(i); 
        } 
    } 
}