com.ibm.icu.impl.CharacterIteration Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
/*
*******************************************************************************
* Copyright (C) 2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.impl;
import java.text.CharacterIterator;
import com.ibm.icu.text.UTF16;
public final class CharacterIteration {
// disallow instantiation
private CharacterIteration() { }
// 32 bit Char value returned from when an iterator has run out of range.
// Positive value so fast case (not end, not surrogate) can be checked
// with a single test.
public static final int DONE32 = 0x7fffffff;
/**
* Move the iterator forward to the next code point, and return that code point,
* leaving the iterator positioned at char returned.
* For Supplementary chars, the iterator is left positioned at the lead surrogate.
* @param ci The character iterator
* @return The next code point.
*/
public static int next32(CharacterIterator ci) {
// If the current position is at a surrogate pair, move to the trail surrogate
// which leaves it in position for underlying iterator's next() to work.
int c= ci.current();
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
c = ci.next();
if (cUTF16.TRAIL_SURROGATE_MAX_VALUE) {
c = ci.previous();
}
}
// For BMP chars, this next() is the real deal.
c = ci.next();
// If we might have a lead surrogate, we need to peak ahead to get the trail
// even though we don't want to really be positioned there.
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
c = nextTrail32(ci, c);
}
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
// We got a supplementary char. Back the iterator up to the postion
// of the lead surrogate.
ci.previous();
}
return c;
}
// Out-of-line portion of the in-line Next32 code.
// The call site does an initial ci.next() and calls this function
// if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
// NOTE: we leave the underlying char iterator positioned in the
// middle of a surrogate pair. ci.next() will work correctly
// from there, but the ci.getIndex() will be wrong, and needs
// adjustment.
public static int nextTrail32(CharacterIterator ci, int lead) {
if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
return DONE32;
}
int retVal = lead;
if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
char cTrail = ci.next();
if (UTF16.isTrailSurrogate(cTrail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.previous();
}
}
return retVal;
}
public static int previous32(CharacterIterator ci) {
if (ci.getIndex() <= ci.getBeginIndex()) {
return DONE32;
}
char trail = ci.previous();
int retVal = trail;
if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
char lead = ci.previous();
if (UTF16.isLeadSurrogate(lead)) {
retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.next();
}
}
return retVal;
}
public static int current32(CharacterIterator ci) {
char lead = ci.current();
int retVal = lead;
if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
return retVal;
}
if (UTF16.isLeadSurrogate(lead)) {
int trail = (int)ci.next();
ci.previous();
if (UTF16.isTrailSurrogate((char)trail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
}
} else {
if (lead == CharacterIterator.DONE) {
if (ci.getIndex() >= ci.getEndIndex()) {
retVal = DONE32;
}
}
}
return retVal;
}
}