All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.CompoundTransliterator Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/*
 *******************************************************************************
 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.util.List;

import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.UtilityExtensions;

/**
 * A transliterator that is composed of two or more other
 * transliterator objects linked together.  For example, if one
 * transliterator transliterates from script A to script B, and
 * another transliterates from script B to script C, the two may be
 * combined to form a new transliterator from A to C.
 *
 * 

Composed transliterators may not behave as expected. For * example, inverses may not combine to form the identity * transliterator. See the class documentation for {@link * Transliterator} for details. * *

Copyright © IBM Corporation 1999. All rights reserved. * * @author Alan Liu */ class CompoundTransliterator extends Transliterator { private Transliterator[] trans; private int numAnonymousRBTs = 0; /** * Constructs a new compound transliterator given an array of * transliterators. The array of transliterators may be of any * length, including zero or one, however, useful compound * transliterators have at least two components. * @param transliterators array of Transliterator * objects * @param filter the filter. Any character for which * filter.contains() returns false will not be * altered by this transliterator. If filter is * null then no filtering is applied. */ /*public CompoundTransliterator(Transliterator[] transliterators, UnicodeFilter filter) { super(joinIDs(transliterators), filter); trans = new Transliterator[transliterators.length]; System.arraycopy(transliterators, 0, trans, 0, trans.length); computeMaximumContextLength(); }*/ /** * Constructs a new compound transliterator given an array of * transliterators. The array of transliterators may be of any * length, including zero or one, however, useful compound * transliterators have at least two components. * @param transliterators array of Transliterator * objects */ /*public CompoundTransliterator(Transliterator[] transliterators) { this(transliterators, null); }*/ /** * Constructs a new compound transliterator. * @param ID compound ID * @param direction either Transliterator.FORWARD or Transliterator.REVERSE * @param filter a global filter for this compound transliterator * or null */ /*public CompoundTransliterator(String ID, int direction, UnicodeFilter filter) { super(ID, filter); init(ID, direction, true); }*/ /** * Constructs a new compound transliterator with no filter. * @param ID compound ID * @param direction either Transliterator.FORWARD or Transliterator.REVERSE */ /*public CompoundTransliterator(String ID, int direction) { this(ID, direction, null); }*/ /** * Constructs a new forward compound transliterator with no filter. * @param ID compound ID */ /*public CompoundTransliterator(String ID) { this(ID, FORWARD, null); }*/ /** * Package private constructor for Transliterator from a vector of * transliterators. The caller is responsible for fixing up the * ID. */ CompoundTransliterator(List list) { this(list, 0); } CompoundTransliterator(List list, int numAnonymousRBTs) { super("", null); trans = null; init(list, FORWARD, false); this.numAnonymousRBTs = numAnonymousRBTs; // assume caller will fixup ID } /** * Internal method for safeClone... * @param id * @param filter2 * @param trans2 * @param numAnonymousRBTs2 */ CompoundTransliterator(String id, UnicodeFilter filter2, Transliterator[] trans2, int numAnonymousRBTs2) { super(id, filter2); trans = trans2; numAnonymousRBTs = numAnonymousRBTs2; } /** * Finish constructing a transliterator: only to be called by * constructors. Before calling init(), set trans and filter to NULL. * @param id the id containing ';'-separated entries * @param direction either FORWARD or REVERSE * @param idSplitPoint the index into id at which the * splitTrans should be inserted, if there is one, or * -1 if there is none. * @param splitTrans a transliterator to be inserted * before the entry at offset idSplitPoint in the id string. May be * NULL to insert no entry. * @param fixReverseID if TRUE, then reconstruct the ID of reverse * entries by calling getID() of component entries. Some constructors * do not require this because they apply a facade ID anyway. */ /*private void init(String id, int direction, boolean fixReverseID) { // assert(trans == 0); Vector list = new Vector(); UnicodeSet[] compoundFilter = new UnicodeSet[1]; StringBuffer regenID = new StringBuffer(); if (!TransliteratorIDParser.parseCompoundID(id, direction, regenID, list, compoundFilter)) { throw new IllegalArgumentException("Invalid ID " + id); } TransliteratorIDParser.instantiateList(list); init(list, direction, fixReverseID); if (compoundFilter[0] != null) { setFilter(compoundFilter[0]); } }*/ /** * Finish constructing a transliterator: only to be called by * constructors. Before calling init(), set trans and filter to NULL. * @param list a vector of transliterator objects to be adopted. It * should NOT be empty. The list should be in declared order. That * is, it should be in the FORWARD order; if direction is REVERSE then * the list order will be reversed. * @param direction either FORWARD or REVERSE * @param fixReverseID if TRUE, then reconstruct the ID of reverse * entries by calling getID() of component entries. Some constructors * do not require this because they apply a facade ID anyway. */ private void init(List list, int direction, boolean fixReverseID) { // assert(trans == 0); // Allocate array int count = list.size(); trans = new Transliterator[count]; // Move the transliterators from the vector into an array. // Reverse the order if necessary. int i; for (i=0; i 0) { newID.append(ID_DELIM); } newID.append(trans[i].getID()); } setID(newID.toString()); } computeMaximumContextLength(); } /** * Return the IDs of the given list of transliterators, concatenated * with ';' delimiting them. Equivalent to the perlish expression * join(';', map($_.getID(), transliterators). */ /*private static String joinIDs(Transliterator[] transliterators) { StringBuffer id = new StringBuffer(); for (int i=0; i 0) { id.append(';'); } id.append(transliterators[i].getID()); } return id.toString(); }*/ /** * Returns the number of transliterators in this chain. * @return number of transliterators in this chain. */ public int getCount() { return trans.length; } /** * Returns the transliterator at the given index in this chain. * @param index index into chain, from 0 to getCount() - 1 * @return transliterator at the given index */ public Transliterator getTransliterator(int index) { return trans[index]; } /** * Append c to buf, unless buf is empty or buf already ends in c. */ private static void _smartAppend(StringBuilder buf, char c) { if (buf.length() != 0 && buf.charAt(buf.length() - 1) != c) { buf.append(c); } } /** * Override Transliterator: * Create a rule string that can be passed to createFromRules() * to recreate this transliterator. * @param escapeUnprintable if TRUE then convert unprintable * character to their hex escape representations, \\uxxxx or * \\Uxxxxxxxx. Unprintable characters are those other than * U+000A, U+0020..U+007E. * @return the rule string */ public String toRules(boolean escapeUnprintable) { // We do NOT call toRules() on our component transliterators, in // general. If we have several rule-based transliterators, this // yields a concatenation of the rules -- not what we want. We do // handle compound RBT transliterators specially -- those for which // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, // we do call toRules() recursively. StringBuilder rulesSource = new StringBuilder(); if (numAnonymousRBTs >= 1 && getFilter() != null) { // If we are a compound RBT and if we have a global // filter, then emit it at the top. rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM); } for (int i=0; i 1 && i > 0 && trans[i - 1].getID().startsWith("%Pass")) rule = "::Null;" + rule; // we also use toRules() on CompoundTransliterators (which we // check for by looking for a semicolon in the ID)-- this gets // the list of their child transliterators output in the right // format } else if (trans[i].getID().indexOf(';') >= 0) { rule = trans[i].toRules(escapeUnprintable); // for everything else, use baseToRules() } else { rule = trans[i].baseToRules(escapeUnprintable); } _smartAppend(rulesSource, '\n'); rulesSource.append(rule); _smartAppend(rulesSource, ID_DELIM); } return rulesSource.toString(); } /** * @internal */ @Override public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = new UnicodeSet(getFilterAsUnicodeSet(filter)); UnicodeSet tempTargetSet = new UnicodeSet(); for (int i=0; i abca/u * C S L C S L gl=f->a * * 2. upup, changes "x" to "XX" * * 4 7 a 4 7 a * abca/u => abcAA/u * C SL C S * L gl=a->b * 3. u-h, changes Unicode to hex * * 4 7 a 4 7 a d 0 3 * abcAA/u => abc/u0041/u0041/u * C S L C S * L gl=b->15 * 4. return * * 4 7 a d 0 3 * abc/u0041/u0041/u * C S L */ if (trans.length < 1) { index.start = index.limit; return; // Short circuit for empty compound transliterators } // compoundLimit is the limit value for the entire compound // operation. We overwrite index.limit with the previous // index.start. After each transliteration, we update // compoundLimit for insertions or deletions that have happened. int compoundLimit = index.limit; // compoundStart is the start for the entire compound // operation. int compoundStart = index.start; int delta = 0; // delta in length StringBuffer log = null; ///CLOVER:OFF if (DEBUG) { log = new StringBuffer("CompoundTransliterator{" + getID() + (incremental ? "}i: IN=" : "}: IN=")); UtilityExtensions.formatInput(log, text, index); System.out.println(Utility.escape(log.toString())); } ///CLOVER:ON // Give each transliterator a crack at the run of characters. // See comments at the top of the method for more detail. for (int i=0; i "); UtilityExtensions.formatInput(log, text, index); System.out.println(Utility.escape(log.toString())); } ///CLOVER:ON // Cumulative delta for insertions/deletions delta += index.limit - limit; if (incremental) { // In the incremental case, only allow subsequent // transliterators to modify what has already been // completely processed by prior transliterators. In the // non-incrmental case, allow each transliterator to // process the entire text. index.limit = index.start; } } compoundLimit += delta; // Start is good where it is -- where the last transliterator left // it. Limit needs to be put back where it was, modulo // adjustments for deletions/insertions. index.limit = compoundLimit; ///CLOVER:OFF if (DEBUG) { log.setLength(0); log.append("CompoundTransliterator{" + getID() + (incremental ? "}i: OUT=" : "}: OUT=")); UtilityExtensions.formatInput(log, text, index); System.out.println(Utility.escape(log.toString())); } ///CLOVER:ON } /** * Compute and set the length of the longest context required by this transliterator. * This is preceding context. */ private void computeMaximumContextLength() { int max = 0; for (int i=0; i max) { max = len; } } setMaximumContextLength(max); } /** * Temporary hack for registry problem. Needs to be replaced by better architecture. */ public Transliterator safeClone() { UnicodeFilter filter = getFilter(); if (filter != null && filter instanceof UnicodeSet) { filter = new UnicodeSet((UnicodeSet)filter); } return new CompoundTransliterator(getID(), filter, trans, numAnonymousRBTs); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy