All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.coll.CollationRootElements Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/*
*******************************************************************************
* Copyright (C) 2013-2014, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* CollationRootElements.java, ported from collationrootelements.h/.cpp
*
* C++ version created on: 2013mar01
* created by: Markus W. Scherer
*/

package com.ibm.icu.impl.coll;

/**
 * Container and access methods for collation elements and weights
 * that occur in the root collator.
 * Needed for finding boundaries for building a tailoring.
 *
 * This class takes and returns 16-bit secondary and tertiary weights.
 */
public final class CollationRootElements {
    public CollationRootElements(long[] rootElements) {
        elements = rootElements;
    }

    /**
     * Higher than any root primary.
     */
    public static final long PRIMARY_SENTINEL = 0xffffff00L;

    /**
     * Flag in a root element, set if the element contains secondary & tertiary weights,
     * rather than a primary.
     */
    public static final int SEC_TER_DELTA_FLAG = 0x80;
    /**
     * Mask for getting the primary range step value from a primary-range-end element.
     */
    public static final int PRIMARY_STEP_MASK = 0x7f;

    /**
     * Index of the first CE with a non-zero tertiary weight.
     * Same as the start of the compact root elements table.
     */
    public static final int IX_FIRST_TERTIARY_INDEX = 0;
    /**
     * Index of the first CE with a non-zero secondary weight.
     */
    static final int IX_FIRST_SECONDARY_INDEX = 1;
    /**
     * Index of the first CE with a non-zero primary weight.
     */
    static final int IX_FIRST_PRIMARY_INDEX = 2;
    /**
     * Must match Collation.COMMON_SEC_AND_TER_CE.
     */
    static final int IX_COMMON_SEC_AND_TER_CE = 3;
    /**
     * Secondary & tertiary boundaries.
     * Bits 31..24: [fixed last secondary common byte 45]
     * Bits 23..16: [fixed first ignorable secondary byte 80]
     * Bits 15.. 8: reserved, 0
     * Bits  7.. 0: [fixed first ignorable tertiary byte 3C]
     */
    static final int IX_SEC_TER_BOUNDARIES = 4;
    /**
     * The current number of indexes.
     * Currently the same as elements[IX_FIRST_TERTIARY_INDEX].
     */
    static final int IX_COUNT = 5;

    /**
     * Returns the boundary between tertiary weights of primary/secondary CEs
     * and those of tertiary CEs.
     * This is the upper limit for tertiaries of primary/secondary CEs.
     * This minus one is the lower limit for tertiaries of tertiary CEs.
     */
    public int getTertiaryBoundary() {
        return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;
    }

    /**
     * Returns the first assigned tertiary CE.
     */
    long getFirstTertiaryCE() {
        return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
    }

    /**
     * Returns the last assigned tertiary CE.
     */
    long getLastTertiaryCE() {
        return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
    }

    /**
     * Returns the last common secondary weight.
     * This is the lower limit for secondaries of primary CEs.
     */
    public int getLastCommonSecondary() {
        return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;
    }

    /**
     * Returns the boundary between secondary weights of primary CEs
     * and those of secondary CEs.
     * This is the upper limit for secondaries of primary CEs.
     * This minus one is the lower limit for secondaries of secondary CEs.
     */
    public int getSecondaryBoundary() {
        return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;
    }

    /**
     * Returns the first assigned secondary CE.
     */
    long getFirstSecondaryCE() {
        return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
    }

    /**
     * Returns the last assigned secondary CE.
     */
    long getLastSecondaryCE() {
        return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
    }

    /**
     * Returns the first assigned primary weight.
     */
    long getFirstPrimary() {
        return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]];  // step=0: cannot be a range end
    }

    /**
     * Returns the first assigned primary CE.
     */
    long getFirstPrimaryCE() {
        return Collation.makeCE(getFirstPrimary());
    }

    /**
     * Returns the last root CE with a primary weight before p.
     * Intended only for reordering group boundaries.
     */
    long lastCEWithPrimaryBefore(long p) {
        if(p == 0) { return 0; }
        assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]);
        int index = findP(p);
        long q = elements[index];
        long secTer;
        if(p == (q & 0xffffff00L)) {
            // p == elements[index] is a root primary. Find the CE before it.
            // We must not be in a primary range.
            assert((q & PRIMARY_STEP_MASK) == 0);
            secTer = elements[index - 1];
            if((secTer & SEC_TER_DELTA_FLAG) == 0) {
                // Primary CE just before p.
                p = secTer & 0xffffff00L;
                secTer = Collation.COMMON_SEC_AND_TER_CE;
            } else {
                // secTer = last secondary & tertiary for the previous primary
                index -= 2;
                for(;;) {
                    p = elements[index];
                    if((p & SEC_TER_DELTA_FLAG) == 0) {
                        p &= 0xffffff00L;
                        break;
                    }
                    --index;
                }
            }
        } else {
            // p > elements[index] which is the previous primary.
            // Find the last secondary & tertiary weights for it.
            p = q & 0xffffff00L;
            secTer = Collation.COMMON_SEC_AND_TER_CE;
            for(;;) {
                q = elements[++index];
                if((q & SEC_TER_DELTA_FLAG) == 0) {
                    // We must not be in a primary range.
                    assert((q & PRIMARY_STEP_MASK) == 0);
                    break;
                }
                secTer = q;
            }
        }
        return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
    }

    /**
     * Returns the first root CE with a primary weight of at least p.
     * Intended only for reordering group boundaries.
     */
    long firstCEWithPrimaryAtLeast(long p) {
        if(p == 0) { return 0; }
        int index = findP(p);
        if(p != (elements[index] & 0xffffff00L)) {
            for(;;) {
                p = elements[++index];
                if((p & SEC_TER_DELTA_FLAG) == 0) {
                    // First primary after p. We must not be in a primary range.
                    assert((p & PRIMARY_STEP_MASK) == 0);
                    break;
                }
            }
        }
        // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
        return (p << 32) | Collation.COMMON_SEC_AND_TER_CE;
    }

    /**
     * Returns the primary weight before p.
     * p must be greater than the first root primary.
     */
    long getPrimaryBefore(long p, boolean isCompressible) {
        int index = findPrimary(p);
        int step;
        long q = elements[index];
        if(p == (q & 0xffffff00L)) {
            // Found p itself. Return the previous primary.
            // See if p is at the end of a previous range.
            step = (int)q & PRIMARY_STEP_MASK;
            if(step == 0) {
                // p is not at the end of a range. Look for the previous primary.
                do {
                    p = elements[--index];
                } while((p & SEC_TER_DELTA_FLAG) != 0);
                return p & 0xffffff00L;
            }
        } else {
            // p is in a range, and not at the start.
            long nextElement = elements[index + 1];
            assert(isEndOfPrimaryRange(nextElement));
            step = (int)nextElement & PRIMARY_STEP_MASK;
        }
        // Return the previous range primary.
        if((p & 0xffff) == 0) {
            return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step);
        } else {
            return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step);
        }
    }

    /** Returns the secondary weight before [p, s]. */
    int getSecondaryBefore(long p, int s) {
        int index;
        int previousSec, sec;
        if(p == 0) {
            index = (int)elements[IX_FIRST_SECONDARY_INDEX];
            // Gap at the beginning of the secondary CE range.
            previousSec = 0;
            sec = (int)(elements[index] >> 16);
        } else {
            index = findPrimary(p) + 1;
            previousSec = Collation.MERGE_SEPARATOR_WEIGHT16;
            sec = Collation.COMMON_WEIGHT16;
        }
        assert(s >= sec);
        while(s > sec) {
            previousSec = sec;
            assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
            sec = (int)(elements[index++] >> 16);
        }
        assert(sec == s);
        return previousSec;
    }

    /** Returns the tertiary weight before [p, s, t]. */
    int getTertiaryBefore(long p, int s, int t) {
        assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0);
        int index;
        int previousTer;
        long secTer;
        if(p == 0) {
            if(s == 0) {
                index = (int)elements[IX_FIRST_TERTIARY_INDEX];
                // Gap at the beginning of the tertiary CE range.
                previousTer = 0;
            } else {
                index = (int)elements[IX_FIRST_SECONDARY_INDEX];
                previousTer = Collation.MERGE_SEPARATOR_WEIGHT16;
            }
            secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
        } else {
            index = findPrimary(p) + 1;
            previousTer = Collation.MERGE_SEPARATOR_WEIGHT16;
            secTer = Collation.COMMON_SEC_AND_TER_CE;
        }
        long st = ((long)s << 16) | t;
        while(st > secTer) {
            if((int)(secTer >> 16) == s) { previousTer = (int)secTer; }
            assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
            secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
        }
        assert(secTer == st);
        return previousTer & 0xffff;
    }

    /**
     * Finds the index of the input primary.
     * p must occur as a root primary, and must not be 0.
     */
    int findPrimary(long p) {
        // Requirement: p must occur as a root primary.
        assert((p & 0xff) == 0);  // at most a 3-byte primary
        int index = findP(p);
        // If p is in a range, then we just assume that p is an actual primary in this range.
        // (Too cumbersome/expensive to check.)
        // Otherwise, it must be an exact match.
        assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L));
        return index;
    }

    /**
     * Returns the primary weight after p where index=findPrimary(p).
     * p must be at least the first root primary.
     */
    long getPrimaryAfter(long p, int index, boolean isCompressible) {
        assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1]));
        long q = elements[++index];
        int step;
        if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) {
            // Return the next primary in this range.
            if((p & 0xffff) == 0) {
                return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step);
            } else {
                return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step);
            }
        } else {
            // Return the next primary in the list.
            while((q & SEC_TER_DELTA_FLAG) != 0) {
                q = elements[++index];
            }
            assert((q & PRIMARY_STEP_MASK) == 0);
            return q;
        }
    }
    /**
     * Returns the secondary weight after [p, s] where index=findPrimary(p)
     * except use index=0 for p=0.
     */
    int getSecondaryAfter(int index, int s) {
        int secLimit;
        if(index == 0) {
            // primary = 0
            index = (int)elements[IX_FIRST_SECONDARY_INDEX];
            // Gap at the end of the secondary CE range.
            secLimit = 0x10000;
        } else {
            assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
            ++index;
            // Gap for secondaries of primary CEs.
            secLimit = getSecondaryBoundary();
        }
        for(;;) {
            long secTer = elements[index];
            if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
            int sec = (int)(secTer >> 16);
            if(sec > s) { return sec; }
            ++index;
        }
    }
    /**
     * Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
     * except use index=0 for p=0.
     */
    int getTertiaryAfter(int index, int s, int t) {
        int terLimit;
        if(index == 0) {
            // primary = 0
            if(s == 0) {
                index = (int)elements[IX_FIRST_TERTIARY_INDEX];
                // Gap at the end of the tertiary CE range.
                terLimit = 0x4000;
            } else {
                index = (int)elements[IX_FIRST_SECONDARY_INDEX];
                // Gap for tertiaries of primary/secondary CEs.
                terLimit = getTertiaryBoundary();
            }
        } else {
            assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
            ++index;
            terLimit = getTertiaryBoundary();
        }
        long st = (((long)s & 0xffffffffL) << 16) | t;
        for(;;) {
            long secTer = elements[index];
            // No tertiary greater than t for this primary+secondary.
            if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
            secTer &= ~SEC_TER_DELTA_FLAG;
            if(secTer > st) { return (int)secTer & 0xffff; }
            ++index;
        }
    }

    /**
     * Finds the largest index i where elements[i]<=p.
     * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
     * Does not require that p is a root collator primary.
     */
    private int findP(long p) {
        // p need not occur as a root primary.
        // For example, it might be a reordering group boundary.
        assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE);
        // modified binary search
        int start = (int)elements[IX_FIRST_PRIMARY_INDEX];
        assert(p >= elements[start]);
        int limit = elements.length - 1;
        assert(elements[limit] >= PRIMARY_SENTINEL);
        assert(p < elements[limit]);
        while((start + 1) < limit) {
            // Invariant: elements[start] and elements[limit] are primaries,
            // and elements[start]<=p<=elements[limit].
            int i = (start + limit) / 2;
            long q = elements[i];
            if((q & SEC_TER_DELTA_FLAG) != 0) {
                // Find the next primary.
                int j = i + 1;
                for(;;) {
                    if(j == limit) { break; }
                    q = elements[j];
                    if((q & SEC_TER_DELTA_FLAG) == 0) {
                        i = j;
                        break;
                    }
                    ++j;
                }
                if((q & SEC_TER_DELTA_FLAG) != 0) {
                    // Find the preceding primary.
                    j = i - 1;
                    for(;;) {
                        if(j == start) { break; }
                        q = elements[j];
                        if((q & SEC_TER_DELTA_FLAG) == 0) {
                            i = j;
                            break;
                        }
                        --j;
                    }
                    if((q & SEC_TER_DELTA_FLAG) != 0) {
                        // No primary between start and limit.
                        break;
                    }
                }
            }
            if(p < (q & 0xffffff00L)) {  // Reset the "step" bits of a range end primary.
                limit = i;
            } else {
                start = i;
            }
        }
        return start;
    }

    private static boolean isEndOfPrimaryRange(long q) {
        return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;
    }

    /**
     * Data structure: See ICU4C source/i18n/collationrootelements.h.
     */
    private long[] elements;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy