All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.CharacterIteration Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/*
 *******************************************************************************
 * Copyright (C) 2013, International Business Machines Corporation and         *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.impl;

import java.text.CharacterIterator;

import com.ibm.icu.text.UTF16;

public final class CharacterIteration {
    // disallow instantiation
    private CharacterIteration() { }

    // 32 bit Char value returned from when an iterator has run out of range.
    //     Positive value so fast case (not end, not surrogate) can be checked
    //     with a single test.
    public static final int DONE32 = 0x7fffffff;

    /**
     * Move the iterator forward to the next code point, and return that code point,
     *   leaving the iterator positioned at char returned.
     *   For Supplementary chars, the iterator is left positioned at the lead surrogate.
     * @param ci  The character iterator
     * @return    The next code point.
     */
    public static int next32(CharacterIterator ci) {
        // If the current position is at a surrogate pair, move to the trail surrogate
        //   which leaves it in position for underlying iterator's next() to work.
        int c= ci.current();
        if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
            c = ci.next();   
            if (cUTF16.TRAIL_SURROGATE_MAX_VALUE) {
               c = ci.previous();   
            }
        }

        // For BMP chars, this next() is the real deal.
        c = ci.next();
        
        // If we might have a lead surrogate, we need to peak ahead to get the trail 
        //  even though we don't want to really be positioned there.
        if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
            c = nextTrail32(ci, c);   
        }
        
        if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
            // We got a supplementary char.  Back the iterator up to the postion
            // of the lead surrogate.
            ci.previous();   
        }
        return c;
   }

    
    // Out-of-line portion of the in-line Next32 code.
    // The call site does an initial ci.next() and calls this function
    //    if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
    // NOTE:  we leave the underlying char iterator positioned in the
    //        middle of a surrogate pair.  ci.next() will work correctly
    //        from there, but the ci.getIndex() will be wrong, and needs
    //        adjustment.
    public static int nextTrail32(CharacterIterator ci, int lead) {
        if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
            return DONE32;
        }
        int retVal = lead;
        if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
            char  cTrail = ci.next();
            if (UTF16.isTrailSurrogate(cTrail)) {
                retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                            (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                            UTF16.SUPPLEMENTARY_MIN_VALUE;
            } else {
                ci.previous();
            }
        }
        return retVal;
    }
       
    public static int previous32(CharacterIterator ci) {
        if (ci.getIndex() <= ci.getBeginIndex()) {
            return DONE32;   
        }
        char trail = ci.previous();
        int retVal = trail;
        if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
            char lead = ci.previous();
            if (UTF16.isLeadSurrogate(lead)) {
                retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                          ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                          UTF16.SUPPLEMENTARY_MIN_VALUE;
            } else {
                ci.next();
            }           
        }
        return retVal;
    }
   
    public static int current32(CharacterIterator ci) {
        char  lead   = ci.current();
        int   retVal = lead;
        if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
            return retVal;   
        }
        if (UTF16.isLeadSurrogate(lead)) {
            int  trail = (int)ci.next();
            ci.previous();
            if (UTF16.isTrailSurrogate((char)trail)) {
                retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                         (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                         UTF16.SUPPLEMENTARY_MIN_VALUE;
            }
         } else {
            if (lead == CharacterIterator.DONE) {
                if (ci.getIndex() >= ci.getEndIndex())   {
                    retVal = DONE32;   
                }
            }
         }
        return retVal;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy