All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.ComposedCharIter Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/*
 *******************************************************************************
 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.text;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;

/**
 * This class has been deprecated since ICU 2.2.
 * One problem is that this class is not designed to return supplementary characters.
 * Use the Normalizer2 and UCharacter classes instead.
 * 

* ComposedCharIter is an iterator class that returns all * of the precomposed characters defined in the Unicode standard, along * with their decomposed forms. This is often useful when building * data tables (e.g. collation tables) which need to treat composed * and decomposed characters equivalently. *

* For example, imagine that you have built a collation table with ordering * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all * characters used in a particular language. When you process input text using * this table, the text must first be decomposed so that it matches the form * used in the table. This can impose a performance penalty that may be * unacceptable in some situations. *

* You can avoid this problem by ensuring that the collation table contains * rules for both the decomposed and composed versions of each character. * To do so, use a ComposedCharIter to iterate through all of the * composed characters in Unicode. If the decomposition for that character * consists solely of characters that are listed in your ruleset, you can * add a new rule for the composed character that makes it equivalent to * its decomposition sequence. *

* Note that ComposedCharIter iterates over a static table * of the composed characters in Unicode. If you want to iterate over the * composed characters in a particular string, use {@link Normalizer} instead. *

* When constructing a ComposedCharIter there is one * optional feature that you can enable or disable: *

    *
  • {@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul * characters and their corresponding Jamo decompositions. * This option is off by default (i.e. Hangul processing is enabled) * since the Unicode standard specifies that Hangul to Jamo * is a canonical decomposition. *
*

* ComposedCharIter is currently based on version 2.1.8 of the * Unicode Standard. * It will be updated as later versions of Unicode are released. * @deprecated ICU 2.2 */ ///CLOVER:OFF public final class ComposedCharIter { /** * Constant that indicates the iteration has completed. * {@link #next} returns this value when there are no more composed characters * over which to iterate. * @deprecated ICU 2.2 */ public static final char DONE = (char) Normalizer.DONE; /** * Construct a new ComposedCharIter. The iterator will return * all Unicode characters with canonical decompositions, including Korean * Hangul characters. * @deprecated ICU 2.2 */ public ComposedCharIter() { this(false, 0); } /** * Constructs a non-default ComposedCharIter with optional behavior. *

* @param compat false for canonical decompositions only; * true for both canonical and compatibility * decompositions. * * @param options Optional decomposition features. None are supported, so this is ignored. * @deprecated ICU 2.2 */ public ComposedCharIter(boolean compat, int options) { if(compat) { n2impl = Norm2AllModes.getNFKCInstance().impl; } else { n2impl = Norm2AllModes.getNFCInstance().impl; } } /** * Determines whether there any precomposed Unicode characters not yet returned * by {@link #next}. * @deprecated ICU 2.2 */ public boolean hasNext() { if (nextChar == Normalizer.DONE) { findNextChar(); } return nextChar != Normalizer.DONE; } /** * Returns the next precomposed Unicode character. * Repeated calls to next return all of the precomposed characters defined * by Unicode, in ascending order. After all precomposed characters have * been returned, {@link #hasNext} will return false and further calls * to next will return {@link #DONE}. * @deprecated ICU 2.2 */ public char next() { if (nextChar == Normalizer.DONE) { findNextChar(); } curChar = nextChar; nextChar = Normalizer.DONE; return (char) curChar; } /** * Returns the Unicode decomposition of the current character. * This method returns the decomposition of the precomposed character most * recently returned by {@link #next}. The resulting decomposition is * affected by the settings of the options passed to the constructor. * @deprecated ICU 2.2 */ public String decomposition() { // the decomposition buffer contains the decomposition of // current char so just return it if(decompBuf != null) { return decompBuf; } else { return ""; } } private void findNextChar() { int c=curChar+1; decompBuf = null; for(;;) { if(c < 0xFFFF) { decompBuf = n2impl.getDecomposition(c); if(decompBuf != null) { // the curChar can be decomposed... so it is a composed char // cache the result break; } c++; } else { c=Normalizer.DONE; break; } } nextChar=c; } private final Normalizer2Impl n2impl; private String decompBuf; private int curChar = 0; private int nextChar = Normalizer.DONE; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy