com.ibm.icu.text.CompoundTransliterator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support
There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.util.List;

import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.UtilityExtensions;

/**
 * A transliterator that is composed of two or more other
 * transliterator objects linked together.  For example, if one
 * transliterator transliterates from script A to script B, and
 * another transliterates from script B to script C, the two may be
 * combined to form a new transliterator from A to C.
 *
 * Composed transliterators may not behave as expected.  For
 * example, inverses may not combine to form the identity
 * transliterator.  See the class documentation for {@link
 * Transliterator} for details.
 *
 * Copyright © IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
 */
class CompoundTransliterator extends Transliterator {

    private Transliterator[] trans;

    private int numAnonymousRBTs = 0;

    /**
     * Constructs a new compound transliterator given an array of
     * transliterators.  The array of transliterators may be of any
     * length, including zero or one, however, useful compound
     * transliterators have at least two components.
     * @param transliterators array of Transliterator
     * objects
     * @param filter the filter.  Any character for which
     * filter.contains() returns false will not be
     * altered by this transliterator.  If filter is
     * null then no filtering is applied.
     */
    /*public CompoundTransliterator(Transliterator[] transliterators,
                                  UnicodeFilter filter) {
        super(joinIDs(transliterators), filter);
        trans = new Transliterator[transliterators.length];
        System.arraycopy(transliterators, 0, trans, 0, trans.length);
        computeMaximumContextLength();
    }*/

    /**
     * Constructs a new compound transliterator given an array of
     * transliterators.  The array of transliterators may be of any
     * length, including zero or one, however, useful compound
     * transliterators have at least two components.
     * @param transliterators array of Transliterator
     * objects
     */
    /*public CompoundTransliterator(Transliterator[] transliterators) {
        this(transliterators, null);
    }*/

    /**
     * Constructs a new compound transliterator.
     * @param ID compound ID
     * @param direction either Transliterator.FORWARD or Transliterator.REVERSE
     * @param filter a global filter for this compound transliterator
     * or null
     */
    /*public CompoundTransliterator(String ID, int direction,
                                  UnicodeFilter filter) {
        super(ID, filter);
        init(ID, direction, true);
    }*/

    /**
     * Constructs a new compound transliterator with no filter.
     * @param ID compound ID
     * @param direction either Transliterator.FORWARD or Transliterator.REVERSE
     */
    /*public CompoundTransliterator(String ID, int direction) {
        this(ID, direction, null);
    }*/

    /**
     * Constructs a new forward compound transliterator with no filter.
     * @param ID compound ID
     */
    /*public CompoundTransliterator(String ID) {
        this(ID, FORWARD, null);
    }*/

    /**
     * Package private constructor for Transliterator from a vector of
     * transliterators.  The caller is responsible for fixing up the
     * ID.
     */
    CompoundTransliterator(List list) {
        this(list, 0);
    }

    CompoundTransliterator(List list, int numAnonymousRBTs) {
        super("", null);
        trans = null;
        init(list, FORWARD, false);
        this.numAnonymousRBTs = numAnonymousRBTs;
        // assume caller will fixup ID
    }

    /**
     * Internal method for safeClone...
     * @param id
     * @param filter2
     * @param trans2
     * @param numAnonymousRBTs2
     */
    CompoundTransliterator(String id, UnicodeFilter filter2, Transliterator[] trans2, int numAnonymousRBTs2) {
        super(id, filter2);
        trans = trans2;
        numAnonymousRBTs = numAnonymousRBTs2;
    }

    /**
     * Finish constructing a transliterator: only to be called by
     * constructors.  Before calling init(), set trans and filter to NULL.
     * @param id the id containing ';'-separated entries
     * @param direction either FORWARD or REVERSE
     * @param idSplitPoint the index into id at which the
     * splitTrans should be inserted, if there is one, or
     * -1 if there is none.
     * @param splitTrans a transliterator to be inserted
     * before the entry at offset idSplitPoint in the id string.  May be
     * NULL to insert no entry.
     * @param fixReverseID if true, then reconstruct the ID of reverse
     * entries by calling getID() of component entries.  Some constructors
     * do not require this because they apply a facade ID anyway.
     */
    /*private void init(String id,
                      int direction,
                      boolean fixReverseID) {
        // assert(trans == 0);

        Vector list = new Vector();
        UnicodeSet[] compoundFilter = new UnicodeSet[1];
        StringBuffer regenID = new StringBuffer();
        if (!TransliteratorIDParser.parseCompoundID(id, direction,
                 regenID, list, compoundFilter)) {
            throw new IllegalArgumentException("Invalid ID " + id);
        }

        TransliteratorIDParser.instantiateList(list);

        init(list, direction, fixReverseID);

        if (compoundFilter[0] != null) {
            setFilter(compoundFilter[0]);
        }
    }*/


    /**
     * Finish constructing a transliterator: only to be called by
     * constructors.  Before calling init(), set trans and filter to NULL.
     * @param list a vector of transliterator objects to be adopted.  It
     * should NOT be empty.  The list should be in declared order.  That
     * is, it should be in the FORWARD order; if direction is REVERSE then
     * the list order will be reversed.
     * @param direction either FORWARD or REVERSE
     * @param fixReverseID if true, then reconstruct the ID of reverse
     * entries by calling getID() of component entries.  Some constructors
     * do not require this because they apply a facade ID anyway.
     */
    private void init(List list,
                      int direction,
                      boolean fixReverseID) {
        // assert(trans == 0);

        // Allocate array
        int count = list.size();
        trans = new Transliterator[count];

        // Move the transliterators from the vector into an array.
        // Reverse the order if necessary.
        int i;
        for (i=0; i 0) {
                    newID.append(ID_DELIM);
                }
                newID.append(trans[i].getID());
            }
            setID(newID.toString());
        }

        computeMaximumContextLength();
    }

    /**
     * Return the IDs of the given list of transliterators, concatenated
     * with ';' delimiting them.  Equivalent to the perlish expression
     * join(';', map($_.getID(), transliterators).
     */
    /*private static String joinIDs(Transliterator[] transliterators) {
        StringBuffer id = new StringBuffer();
        for (int i=0; i 0) {
                id.append(';');
            }
            id.append(transliterators[i].getID());
        }
        return id.toString();
    }*/

    /**
     * Returns the number of transliterators in this chain.
     * @return number of transliterators in this chain.
     */
    public int getCount() {
        return trans.length;
    }

    /**
     * Returns the transliterator at the given index in this chain.
     * @param index index into chain, from 0 to getCount() - 1
     * @return transliterator at the given index
     */
    public Transliterator getTransliterator(int index) {
        return trans[index];
    }

    /**
     * Append c to buf, unless buf is empty or buf already ends in c.
     */
    private static void _smartAppend(StringBuilder buf, char c) {
        if (buf.length() != 0 &&
            buf.charAt(buf.length() - 1) != c) {
            buf.append(c);
        }
    }

    /**
     * Override Transliterator:
     * Create a rule string that can be passed to createFromRules()
     * to recreate this transliterator.
     * @param escapeUnprintable if true then convert unprintable
     * character to their hex escape representations, \\uxxxx or
     * \\Uxxxxxxxx.  Unprintable characters are those other than
     * U+000A, U+0020..U+007E.
     * @return the rule string
     */
    @Override
    public String toRules(boolean escapeUnprintable) {
        // We do NOT call toRules() on our component transliterators, in
        // general.  If we have several rule-based transliterators, this
        // yields a concatenation of the rules -- not what we want.  We do
        // handle compound RBT transliterators specially -- those for which
        // compoundRBTIndex >= 0.  For the transliterator at compoundRBTIndex,
        // we do call toRules() recursively.
        StringBuilder rulesSource = new StringBuilder();
        if (numAnonymousRBTs >= 1 && getFilter() != null) {
            // If we are a compound RBT and if we have a global
            // filter, then emit it at the top.
            rulesSource.append("::").append(getFilter().toPattern(escapeUnprintable)).append(ID_DELIM);
        }
        for (int i=0; i 1 && i > 0 && trans[i - 1].getID().startsWith("%Pass"))
                    rule = "::Null;" + rule;

            // we also use toRules() on CompoundTransliterators (which we
            // check for by looking for a semicolon in the ID)-- this gets
            // the list of their child transliterators output in the right
            // format
            } else if (trans[i].getID().indexOf(';') >= 0) {
                rule = trans[i].toRules(escapeUnprintable);

            // for everything else, use baseToRules()
            } else {
                rule = trans[i].baseToRules(escapeUnprintable);
            }
            _smartAppend(rulesSource, '\n');
            rulesSource.append(rule);
            _smartAppend(rulesSource, ID_DELIM);
        }
        return rulesSource.toString();
    }

    /**
     * @internal
     */
    @Override
    public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) {
        UnicodeSet myFilter = new UnicodeSet(getFilterAsUnicodeSet(filter));
        UnicodeSet tempTargetSet = new UnicodeSet();
        for (int i=0; i  abca/u
         *    C  S       L       C   S L   gl=f->a
         *
         * 2. upup, changes "x" to "XX"
         *
         *    4  7  a       4  7  a
         *    abca/u    =>  abcAA/u
         *    C  SL         C    S
         *                       L    gl=a->b
         * 3. u-h, changes Unicode to hex
         *
         *    4  7  a        4  7  a  d  0  3
         *    abcAA/u    =>  abc/u0041/u0041/u
         *    C  S L         C              S
         *                                  L   gl=b->15
         * 4. return
         *
         *    4  7  a  d  0  3
         *    abc/u0041/u0041/u
         *    C S L
         */

        if (trans.length < 1) {
            index.start = index.limit;
            return; // Short circuit for empty compound transliterators
        }

        // compoundLimit is the limit value for the entire compound
        // operation.  We overwrite index.limit with the previous
        // index.start.  After each transliteration, we update
        // compoundLimit for insertions or deletions that have happened.
        int compoundLimit = index.limit;

        // compoundStart is the start for the entire compound
        // operation.
        int compoundStart = index.start;

        int delta = 0; // delta in length

        StringBuffer log = null;
        ///CLOVER:OFF
        if (DEBUG) {
            log = new StringBuffer("CompoundTransliterator{" + getID() +
                                   (incremental ? "}i: IN=" : "}: IN="));
            UtilityExtensions.formatInput(log, text, index);
            System.out.println(Utility.escape(log.toString()));
        }
        ///CLOVER:ON

        // Give each transliterator a crack at the run of characters.
        // See comments at the top of the method for more detail.
        for (int i=0; i ");
                UtilityExtensions.formatInput(log, text, index);
                System.out.println(Utility.escape(log.toString()));
            }
            ///CLOVER:ON

            // Cumulative delta for insertions/deletions
            delta += index.limit - limit;

            if (incremental) {
                // In the incremental case, only allow subsequent
                // transliterators to modify what has already been
                // completely processed by prior transliterators.  In the
                // non-incrmental case, allow each transliterator to
                // process the entire text.
                index.limit = index.start;
            }
        }

        compoundLimit += delta;

        // Start is good where it is -- where the last transliterator left
        // it.  Limit needs to be put back where it was, modulo
        // adjustments for deletions/insertions.
        index.limit = compoundLimit;

        ///CLOVER:OFF
        if (DEBUG) {
            log.setLength(0);
            log.append("CompoundTransliterator{" + getID() +
                       (incremental ? "}i: OUT=" : "}: OUT="));
            UtilityExtensions.formatInput(log, text, index);
            System.out.println(Utility.escape(log.toString()));
        }
        ///CLOVER:ON
    }

    /**
     * Compute and set the length of the longest context required by this transliterator.
     * This is preceding context.
     */
    private void computeMaximumContextLength() {
        int max = 0;
        for (int i=0; i max) {
                max = len;
            }
        }
        setMaximumContextLength(max);
    }

    /**
     * Temporary hack for registry problem. Needs to be replaced by better architecture.
     */
    public Transliterator safeClone() {
        UnicodeFilter filter = getFilter();
        if (filter != null && filter instanceof UnicodeSet) {
            filter = new UnicodeSet((UnicodeSet)filter);
        }
        return new CompoundTransliterator(getID(), filter, trans, numAnonymousRBTs);
    }
}