com.ibm.icu.impl.coll.CollationRootElements Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
/*
*******************************************************************************
* Copyright (C) 2013-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* CollationRootElements.java, ported from collationrootelements.h/.cpp
*
* C++ version created on: 2013mar01
* created by: Markus W. Scherer
*/
package com.ibm.icu.impl.coll;
/**
* Container and access methods for collation elements and weights
* that occur in the root collator.
* Needed for finding boundaries for building a tailoring.
*
* This class takes and returns 16-bit secondary and tertiary weights.
*/
public final class CollationRootElements {
public CollationRootElements(long[] rootElements) {
elements = rootElements;
}
/**
* Higher than any root primary.
*/
public static final long PRIMARY_SENTINEL = 0xffffff00L;
/**
* Flag in a root element, set if the element contains secondary & tertiary weights,
* rather than a primary.
*/
public static final int SEC_TER_DELTA_FLAG = 0x80;
/**
* Mask for getting the primary range step value from a primary-range-end element.
*/
public static final int PRIMARY_STEP_MASK = 0x7f;
/**
* Index of the first CE with a non-zero tertiary weight.
* Same as the start of the compact root elements table.
*/
public static final int IX_FIRST_TERTIARY_INDEX = 0;
/**
* Index of the first CE with a non-zero secondary weight.
*/
static final int IX_FIRST_SECONDARY_INDEX = 1;
/**
* Index of the first CE with a non-zero primary weight.
*/
static final int IX_FIRST_PRIMARY_INDEX = 2;
/**
* Must match Collation.COMMON_SEC_AND_TER_CE.
*/
static final int IX_COMMON_SEC_AND_TER_CE = 3;
/**
* Secondary & tertiary boundaries.
* Bits 31..24: [fixed last secondary common byte 45]
* Bits 23..16: [fixed first ignorable secondary byte 80]
* Bits 15.. 8: reserved, 0
* Bits 7.. 0: [fixed first ignorable tertiary byte 3C]
*/
static final int IX_SEC_TER_BOUNDARIES = 4;
/**
* The current number of indexes.
* Currently the same as elements[IX_FIRST_TERTIARY_INDEX].
*/
static final int IX_COUNT = 5;
/**
* Returns the boundary between tertiary weights of primary/secondary CEs
* and those of tertiary CEs.
* This is the upper limit for tertiaries of primary/secondary CEs.
* This minus one is the lower limit for tertiaries of tertiary CEs.
*/
public int getTertiaryBoundary() {
return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;
}
/**
* Returns the first assigned tertiary CE.
*/
long getFirstTertiaryCE() {
return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
}
/**
* Returns the last assigned tertiary CE.
*/
long getLastTertiaryCE() {
return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
}
/**
* Returns the last common secondary weight.
* This is the lower limit for secondaries of primary CEs.
*/
public int getLastCommonSecondary() {
return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;
}
/**
* Returns the boundary between secondary weights of primary CEs
* and those of secondary CEs.
* This is the upper limit for secondaries of primary CEs.
* This minus one is the lower limit for secondaries of secondary CEs.
*/
public int getSecondaryBoundary() {
return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;
}
/**
* Returns the first assigned secondary CE.
*/
long getFirstSecondaryCE() {
return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
}
/**
* Returns the last assigned secondary CE.
*/
long getLastSecondaryCE() {
return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
}
/**
* Returns the first assigned primary weight.
*/
long getFirstPrimary() {
return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]; // step=0: cannot be a range end
}
/**
* Returns the first assigned primary CE.
*/
long getFirstPrimaryCE() {
return Collation.makeCE(getFirstPrimary());
}
/**
* Returns the last root CE with a primary weight before p.
* Intended only for reordering group boundaries.
*/
long lastCEWithPrimaryBefore(long p) {
if(p == 0) { return 0; }
assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]);
int index = findP(p);
long q = elements[index];
long secTer;
if(p == (q & 0xffffff00L)) {
// p == elements[index] is a root primary. Find the CE before it.
// We must not be in a primary range.
assert((q & PRIMARY_STEP_MASK) == 0);
secTer = elements[index - 1];
if((secTer & SEC_TER_DELTA_FLAG) == 0) {
// Primary CE just before p.
p = secTer & 0xffffff00L;
secTer = Collation.COMMON_SEC_AND_TER_CE;
} else {
// secTer = last secondary & tertiary for the previous primary
index -= 2;
for(;;) {
p = elements[index];
if((p & SEC_TER_DELTA_FLAG) == 0) {
p &= 0xffffff00L;
break;
}
--index;
}
}
} else {
// p > elements[index] which is the previous primary.
// Find the last secondary & tertiary weights for it.
p = q & 0xffffff00L;
secTer = Collation.COMMON_SEC_AND_TER_CE;
for(;;) {
q = elements[++index];
if((q & SEC_TER_DELTA_FLAG) == 0) {
// We must not be in a primary range.
assert((q & PRIMARY_STEP_MASK) == 0);
break;
}
secTer = q;
}
}
return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
}
/**
* Returns the first root CE with a primary weight of at least p.
* Intended only for reordering group boundaries.
*/
long firstCEWithPrimaryAtLeast(long p) {
if(p == 0) { return 0; }
int index = findP(p);
if(p != (elements[index] & 0xffffff00L)) {
for(;;) {
p = elements[++index];
if((p & SEC_TER_DELTA_FLAG) == 0) {
// First primary after p. We must not be in a primary range.
assert((p & PRIMARY_STEP_MASK) == 0);
break;
}
}
}
// The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
return (p << 32) | Collation.COMMON_SEC_AND_TER_CE;
}
/**
* Returns the primary weight before p.
* p must be greater than the first root primary.
*/
long getPrimaryBefore(long p, boolean isCompressible) {
int index = findPrimary(p);
int step;
long q = elements[index];
if(p == (q & 0xffffff00L)) {
// Found p itself. Return the previous primary.
// See if p is at the end of a previous range.
step = (int)q & PRIMARY_STEP_MASK;
if(step == 0) {
// p is not at the end of a range. Look for the previous primary.
do {
p = elements[--index];
} while((p & SEC_TER_DELTA_FLAG) != 0);
return p & 0xffffff00L;
}
} else {
// p is in a range, and not at the start.
long nextElement = elements[index + 1];
assert(isEndOfPrimaryRange(nextElement));
step = (int)nextElement & PRIMARY_STEP_MASK;
}
// Return the previous range primary.
if((p & 0xffff) == 0) {
return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step);
} else {
return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step);
}
}
/** Returns the secondary weight before [p, s]. */
int getSecondaryBefore(long p, int s) {
int index;
int previousSec, sec;
if(p == 0) {
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
// Gap at the beginning of the secondary CE range.
previousSec = 0;
sec = (int)(elements[index] >> 16);
} else {
index = findPrimary(p) + 1;
previousSec = Collation.MERGE_SEPARATOR_WEIGHT16;
sec = Collation.COMMON_WEIGHT16;
}
assert(s >= sec);
while(s > sec) {
previousSec = sec;
assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
sec = (int)(elements[index++] >> 16);
}
assert(sec == s);
return previousSec;
}
/** Returns the tertiary weight before [p, s, t]. */
int getTertiaryBefore(long p, int s, int t) {
assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0);
int index;
int previousTer;
long secTer;
if(p == 0) {
if(s == 0) {
index = (int)elements[IX_FIRST_TERTIARY_INDEX];
// Gap at the beginning of the tertiary CE range.
previousTer = 0;
} else {
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
previousTer = Collation.MERGE_SEPARATOR_WEIGHT16;
}
secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
} else {
index = findPrimary(p) + 1;
previousTer = Collation.MERGE_SEPARATOR_WEIGHT16;
secTer = Collation.COMMON_SEC_AND_TER_CE;
}
long st = ((long)s << 16) | t;
while(st > secTer) {
if((int)(secTer >> 16) == s) { previousTer = (int)secTer; }
assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
}
assert(secTer == st);
return previousTer & 0xffff;
}
/**
* Finds the index of the input primary.
* p must occur as a root primary, and must not be 0.
*/
int findPrimary(long p) {
// Requirement: p must occur as a root primary.
assert((p & 0xff) == 0); // at most a 3-byte primary
int index = findP(p);
// If p is in a range, then we just assume that p is an actual primary in this range.
// (Too cumbersome/expensive to check.)
// Otherwise, it must be an exact match.
assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L));
return index;
}
/**
* Returns the primary weight after p where index=findPrimary(p).
* p must be at least the first root primary.
*/
long getPrimaryAfter(long p, int index, boolean isCompressible) {
assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1]));
long q = elements[++index];
int step;
if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) {
// Return the next primary in this range.
if((p & 0xffff) == 0) {
return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step);
} else {
return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step);
}
} else {
// Return the next primary in the list.
while((q & SEC_TER_DELTA_FLAG) != 0) {
q = elements[++index];
}
assert((q & PRIMARY_STEP_MASK) == 0);
return q;
}
}
/**
* Returns the secondary weight after [p, s] where index=findPrimary(p)
* except use index=0 for p=0.
*/
int getSecondaryAfter(int index, int s) {
int secLimit;
if(index == 0) {
// primary = 0
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
// Gap at the end of the secondary CE range.
secLimit = 0x10000;
} else {
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
++index;
// Gap for secondaries of primary CEs.
secLimit = getSecondaryBoundary();
}
for(;;) {
long secTer = elements[index];
if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
int sec = (int)(secTer >> 16);
if(sec > s) { return sec; }
++index;
}
}
/**
* Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
* except use index=0 for p=0.
*/
int getTertiaryAfter(int index, int s, int t) {
int terLimit;
if(index == 0) {
// primary = 0
if(s == 0) {
index = (int)elements[IX_FIRST_TERTIARY_INDEX];
// Gap at the end of the tertiary CE range.
terLimit = 0x4000;
} else {
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
// Gap for tertiaries of primary/secondary CEs.
terLimit = getTertiaryBoundary();
}
} else {
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
++index;
terLimit = getTertiaryBoundary();
}
long st = (((long)s & 0xffffffffL) << 16) | t;
for(;;) {
long secTer = elements[index];
// No tertiary greater than t for this primary+secondary.
if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
secTer &= ~SEC_TER_DELTA_FLAG;
if(secTer > st) { return (int)secTer & 0xffff; }
++index;
}
}
/**
* Finds the largest index i where elements[i]<=p.
* Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
* Does not require that p is a root collator primary.
*/
private int findP(long p) {
// p need not occur as a root primary.
// For example, it might be a reordering group boundary.
assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE);
// modified binary search
int start = (int)elements[IX_FIRST_PRIMARY_INDEX];
assert(p >= elements[start]);
int limit = elements.length - 1;
assert(elements[limit] >= PRIMARY_SENTINEL);
assert(p < elements[limit]);
while((start + 1) < limit) {
// Invariant: elements[start] and elements[limit] are primaries,
// and elements[start]<=p<=elements[limit].
int i = (start + limit) / 2;
long q = elements[i];
if((q & SEC_TER_DELTA_FLAG) != 0) {
// Find the next primary.
int j = i + 1;
for(;;) {
if(j == limit) { break; }
q = elements[j];
if((q & SEC_TER_DELTA_FLAG) == 0) {
i = j;
break;
}
++j;
}
if((q & SEC_TER_DELTA_FLAG) != 0) {
// Find the preceding primary.
j = i - 1;
for(;;) {
if(j == start) { break; }
q = elements[j];
if((q & SEC_TER_DELTA_FLAG) == 0) {
i = j;
break;
}
--j;
}
if((q & SEC_TER_DELTA_FLAG) != 0) {
// No primary between start and limit.
break;
}
}
}
if(p < (q & 0xffffff00L)) { // Reset the "step" bits of a range end primary.
limit = i;
} else {
start = i;
}
}
return start;
}
private static boolean isEndOfPrimaryRange(long q) {
return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;
}
/**
* Data structure: See ICU4C source/i18n/collationrootelements.h.
*/
private long[] elements;
}