All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.TransliteratorRegistry Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

The newest version!
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (c) 2001-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   08/19/2001  aliu        Creation.
**********************************************************************
*/

package com.ibm.icu.text;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.Set;

import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.LocaleUtility;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.RuleBasedTransliterator.Data;
import com.ibm.icu.util.CaseInsensitiveString;
import com.ibm.icu.util.UResourceBundle;

class TransliteratorRegistry {

    // char constants
    private static final char LOCALE_SEP  = '_';

    // String constants
    private static final String NO_VARIANT = ""; // empty string
    private static final String ANY = "Any";

    /**
     * Dynamic registry mapping full IDs to Entry objects.  This
     * contains both public and internal entities.  The visibility is
     * controlled by whether an entry is listed in availableIDs and
     * specDAG or not.
     *
     * Keys are CaseInsensitiveString objects.
     * Values are objects of class Class (subclass of Transliterator),
     * RuleBasedTransliterator.Data, Transliterator.Factory, or one
     * of the entry classes defined here (AliasEntry or ResourceEntry).
     */
    private Map registry;

    /**
     * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
     * target => (Vector: variant)) The Vector of variants is never
     * empty.  For a source-target with no variant, the special
     * variant NO_VARIANT (the empty string) is stored in slot zero of
     * the UVector.
     *
     * Keys are CaseInsensitiveString objects.
     * Values are Hashtable of (CaseInsensitiveString -> Vector of
     * CaseInsensitiveString)
     */
    private Map>> specDAG;

    /**
     * Vector of public full IDs (CaseInsensitiveString objects).
     */
    private final Set availableIDs;

    //----------------------------------------------------------------------
    // class Spec
    //----------------------------------------------------------------------

    /**
     * A Spec is a string specifying either a source or a target.  In more
     * general terms, it may also specify a variant, but we only use the
     * Spec class for sources and targets.
     *
     * A Spec may be a locale or a script.  If it is a locale, it has a
     * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
     * ssss is the script mapping of xx_YY_ZZZ.  The Spec API methods
     * hasFallback(), next(), and reset() iterate over this fallback
     * sequence.
     *
     * The Spec class canonicalizes itself, so the locale is put into
     * canonical form, or the script is transformed from an abbreviation
     * to a full name.
     */
    static class Spec {

        private String top;        // top spec
        private String spec;       // current spec
        private String nextSpec;   // next spec
        private String scriptName; // script name equivalent of top, if != top
        private boolean isSpecLocale; // true if spec is a locale
        private boolean isNextLocale; // true if nextSpec is a locale
        private ICUResourceBundle res;

        public Spec(String theSpec) {
            top = theSpec;
            spec = null;
            scriptName = null;
            try{
                // Canonicalize script name.  If top is a script name then
                // script != UScript.INVALID_CODE.
                int script = UScript.getCodeFromName(top);

                // Canonicalize script name -or- do locale->script mapping
                int[] s = UScript.getCode(top);
                if (s != null) {
                    scriptName = UScript.getName(s[0]);
                    // If the script name is the same as top then it's redundant
                    if (scriptName.equalsIgnoreCase(top)) {
                        scriptName = null;
                    }
                }

                isSpecLocale = false;
                res = null;
                // If 'top' is not a script name, try a locale lookup
                if (script == UScript.INVALID_CODE) {
                    Locale toploc = LocaleUtility.getLocaleFromName(top);
                    res  = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_TRANSLIT_BASE_NAME,toploc);
                    // Make sure we got the bundle we wanted; otherwise, don't use it
                    if (res!=null && LocaleUtility.isFallbackOf(res.getULocale().toString(), top)) {
                        isSpecLocale = true;
                    }
                }
            }catch(MissingResourceException e){
                ///CLOVER:OFF
                // The constructor is called from multiple private methods
                //  that protects an invalid scriptName
                scriptName = null;
                ///CLOVER:ON
            }
            // assert(spec != top);
            reset();
        }

        public boolean hasFallback() {
            return nextSpec != null;
        }

        public void reset() {
            if (!Utility.sameObjects(spec, top)) {
                spec = top;
                isSpecLocale = (res != null);
                setupNext();
            }
        }

        private void setupNext() {
            isNextLocale = false;
            if (isSpecLocale) {
                nextSpec = spec;
                int i = nextSpec.lastIndexOf(LOCALE_SEP);
                // If i == 0 then we have _FOO, so we fall through
                // to the scriptName.
                if (i > 0) {
                    nextSpec = spec.substring(0, i);
                    isNextLocale = true;
                } else {
                    nextSpec = scriptName; // scriptName may be null
                }
            } else {
                // Fallback to the script, which may be null
                if (!Utility.sameObjects(nextSpec, scriptName)) {
                    nextSpec = scriptName;
                } else {
                    nextSpec = null;
                }
            }
        }

        // Protocol:
        // for(String& s(spec.get());
        //     spec.hasFallback(); s(spec.next())) { ...

        public String next() {
            spec = nextSpec;
            isSpecLocale = isNextLocale;
            setupNext();
            return spec;
        }

        public String get() {
            return spec;
        }

        public boolean isLocale() {
            return isSpecLocale;
        }

        /**
         * Return the ResourceBundle for this spec, at the current
         * level of iteration.  The level of iteration goes from
         * aa_BB_CCC to aa_BB to aa.  If the bundle does not
         * correspond to the current level of iteration, return null.
         * If isLocale() is false, always return null.
         */
        public ResourceBundle getBundle() {
            if (res != null &&
                res.getULocale().toString().equals(spec)) {
                return res;
            }
            return null;
        }

        public String getTop() {
            return top;
        }
    }

    //----------------------------------------------------------------------
    // Entry classes
    //----------------------------------------------------------------------

    static class ResourceEntry {
        public String resource;
        public int direction;
        public ResourceEntry(String n, int d) {
            resource = n;
            direction = d;
        }
    }

    // An entry representing a rule in a locale resource bundle
    static class LocaleEntry {
        public String rule;
        public int direction;
        public LocaleEntry(String r, int d) {
            rule = r;
            direction = d;
        }
    }

    static class AliasEntry {
        public String alias;
        public AliasEntry(String a) {
            alias = a;
        }
    }

    static class CompoundRBTEntry {
        private String ID;
        private List idBlockVector;
        private List dataVector;
        private UnicodeSet compoundFilter;

        public CompoundRBTEntry(String theID, List theIDBlockVector,
                                List theDataVector,
                                UnicodeSet theCompoundFilter) {
            ID = theID;
            idBlockVector = theIDBlockVector;
            dataVector = theDataVector;
            compoundFilter = theCompoundFilter;
        }

        public Transliterator getInstance() {
            List transliterators = new ArrayList();
            int passNumber = 1;

            int limit = Math.max(idBlockVector.size(), dataVector.size());
            for (int i = 0; i < limit; i++) {
                if (i < idBlockVector.size()) {
                    String idBlock = idBlockVector.get(i);
                    if (idBlock.length() > 0)
                        transliterators.add(Transliterator.getInstance(idBlock));
                }
                if (i < dataVector.size()) {
                    Data data = dataVector.get(i);
                    transliterators.add(new RuleBasedTransliterator("%Pass" + passNumber++, data, null));
                }
            }

            Transliterator t = new CompoundTransliterator(transliterators, passNumber - 1);
            t.setID(ID);
            if (compoundFilter != null) {
                t.setFilter(compoundFilter);
            }
            return t;
        }
    }

    //----------------------------------------------------------------------
    // class TransliteratorRegistry: Basic public API
    //----------------------------------------------------------------------

    public TransliteratorRegistry() {
        registry = Collections.synchronizedMap(new HashMap());
        specDAG = Collections.synchronizedMap(new HashMap>>());
        availableIDs = new LinkedHashSet<>();
    }

    /**
     * Given a simple ID (forward direction, no inline filter, not
     * compound) attempt to instantiate it from the registry.  Return
     * 0 on failure.
     *
     * Return a non-empty aliasReturn value if the ID points to an alias.
     * We cannot instantiate it ourselves because the alias may contain
     * filters or compounds, which we do not understand.  Caller should
     * make aliasReturn empty before calling.
     */
    public Transliterator get(String ID,
                              StringBuffer aliasReturn) {
        Object[] entry = find(ID);
        return (entry == null) ? null
            : instantiateEntry(ID, entry, aliasReturn);
    }

    /**
     * Register a class.  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
     * underlying static locale resource store is masked.
     */
    public void put(String ID,
                    Class transliteratorSubclass,
                    boolean visible) {
        registerEntry(ID, transliteratorSubclass, visible);
    }

    /**
     * Register an ID and a factory function pointer.  This adds an
     * entry to the dynamic store, or replaces an existing entry.  Any
     * entry in the underlying static locale resource store is masked.
     */
    public void put(String ID,
                    Transliterator.Factory factory,
                    boolean visible) {
        registerEntry(ID, factory, visible);
    }

    /**
     * Register an ID and a resource name.  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
     * underlying static locale resource store is masked.
     */
    public void put(String ID,
                    String resourceName,
                    int dir,
                    boolean visible) {
        registerEntry(ID, new ResourceEntry(resourceName, dir), visible);
    }

    /**
     * Register an ID and an alias ID.  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
     * underlying static locale resource store is masked.
     */
    public void put(String ID,
                    String alias,
                    boolean visible) {
        registerEntry(ID, new AliasEntry(alias), visible);
    }

    /**
     * Register an ID and a Transliterator object.  This adds an entry
     * to the dynamic store, or replaces an existing entry.  Any entry
     * in the underlying static locale resource store is masked.
     */
    public void put(String ID,
                    Transliterator trans,
                    boolean visible) {
        registerEntry(ID, trans, visible);
    }

    /**
     * Unregister an ID.  This removes an entry from the dynamic store
     * if there is one.  The static locale resource store is
     * unaffected.
     */
    public void remove(String ID) {
        String[] stv = TransliteratorIDParser.IDtoSTV(ID);
        // Only need to do this if ID.indexOf('-') < 0
        String id = TransliteratorIDParser.STVtoID(stv[0], stv[1], stv[2]);
        registry.remove(new CaseInsensitiveString(id));
        removeSTV(stv[0], stv[1], stv[2]);
        availableIDs.remove(new CaseInsensitiveString(id));
    }

    //----------------------------------------------------------------------
    // class TransliteratorRegistry: Public ID and spec management
    //----------------------------------------------------------------------

    /**
     * An internal class that adapts an enumeration over
     * CaseInsensitiveStrings to an enumeration over Strings.
     */
    private static class IDEnumeration implements Enumeration {
        Enumeration en;

        public IDEnumeration(Enumeration e) {
            en = e;
        }

        @Override
        public boolean hasMoreElements() {
            return en != null && en.hasMoreElements();
        }

        @Override
        public String nextElement() {
            return (en.nextElement()).getString();
        }
    }

    /**
     * Returns an enumeration over the programmatic names of visible
     * registered transliterators.
     *
     * @return An Enumeration over String objects
     */
    public Enumeration getAvailableIDs() {
        // Since the cache contains CaseInsensitiveString objects, but
        // the caller expects Strings, we have to use an intermediary.
        return new IDEnumeration(Collections.enumeration(availableIDs));
    }

    /**
     * Returns an enumeration over all visible source names.
     *
     * @return An Enumeration over String objects
     */
    public Enumeration getAvailableSources() {
        return new IDEnumeration(Collections.enumeration(specDAG.keySet()));
    }

    /**
     * Returns an enumeration over visible target names for the given
     * source.
     *
     * @return An Enumeration over String objects
     */
    public Enumeration getAvailableTargets(String source) {
        CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
        Map> targets = specDAG.get(cisrc);
        if (targets == null) {
            return new IDEnumeration(null);
        }
        return new IDEnumeration(Collections.enumeration(targets.keySet()));
    }

    /**
     * Returns an enumeration over visible variant names for the given
     * source and target.
     *
     * @return An Enumeration over String objects
     */
    public Enumeration getAvailableVariants(String source, String target) {
        CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
        CaseInsensitiveString citrg = new CaseInsensitiveString(target);
        Map> targets = specDAG.get(cisrc);
        if (targets == null) {
            return new IDEnumeration(null);
        }
        List variants = targets.get(citrg);
        if (variants == null) {
            return new IDEnumeration(null);
        }
        return new IDEnumeration(Collections.enumeration(variants));
    }

    //----------------------------------------------------------------------
    // class TransliteratorRegistry: internal
    //----------------------------------------------------------------------

    /**
     * Convenience method.  Calls 6-arg registerEntry().
     */
    private void registerEntry(String source,
                               String target,
                               String variant,
                               Object entry,
                               boolean visible) {
        String s = source;
        if (s.length() == 0) {
            s = ANY;
        }
        String ID = TransliteratorIDParser.STVtoID(source, target, variant);
        registerEntry(ID, s, target, variant, entry, visible);
    }

    /**
     * Convenience method.  Calls 6-arg registerEntry().
     */
    private void registerEntry(String ID,
                               Object entry,
                               boolean visible) {
        String[] stv = TransliteratorIDParser.IDtoSTV(ID);
        // Only need to do this if ID.indexOf('-') < 0
        String id = TransliteratorIDParser.STVtoID(stv[0], stv[1], stv[2]);
        registerEntry(id, stv[0], stv[1], stv[2], entry, visible);
    }

    /**
     * Register an entry object (adopted) with the given ID, source,
     * target, and variant strings.
     */
    private void registerEntry(String ID,
                               String source,
                               String target,
                               String variant,
                               Object entry,
                               boolean visible) {
        CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
        Object[] arrayOfObj;

        // Store the entry within an array so it can be modified later
        if (entry instanceof Object[]) {
            arrayOfObj = (Object[])entry;
        } else {
            arrayOfObj = new Object[] { entry };
        }

        registry.put(ciID, arrayOfObj);
        if (visible) {
            registerSTV(source, target, variant);
            availableIDs.add(ciID);
        } else {
            removeSTV(source, target, variant);
            availableIDs.remove(ciID);
        }
    }

    /**
     * Register a source-target/variant in the specDAG.  Variant may be
     * empty, but source and target must not be.  If variant is empty then
     * the special variant NO_VARIANT is stored in slot zero of the
     * UVector of variants.
     */
    private void registerSTV(String source,
                             String target,
                             String variant) {
        // assert(source.length() > 0);
        // assert(target.length() > 0);
        CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
        CaseInsensitiveString citrg = new CaseInsensitiveString(target);
        CaseInsensitiveString civar = new CaseInsensitiveString(variant);
        Map> targets = specDAG.get(cisrc);
        if (targets == null) {
            targets = Collections.synchronizedMap(new HashMap>());
            specDAG.put(cisrc, targets);
        }
        List variants = targets.get(citrg);
        if (variants == null) {
            variants = new ArrayList();
            targets.put(citrg, variants);
        }
        // assert(NO_VARIANT == "");
        // We add the variant string.  If it is the special "no variant"
        // string, that is, the empty string, we add it at position zero.
        if (!variants.contains(civar)) {
            if (variant.length() > 0) {
                variants.add(civar);
            } else {
                variants.add(0, civar);
            }
        }
    }

    /**
     * Remove a source-target/variant from the specDAG.
     */
    private void removeSTV(String source,
                           String target,
                           String variant) {
        // assert(source.length() > 0);
        // assert(target.length() > 0);
        CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
        CaseInsensitiveString citrg = new CaseInsensitiveString(target);
        CaseInsensitiveString civar = new CaseInsensitiveString(variant);
        Map> targets = specDAG.get(cisrc);
        if (targets == null) {
            return; // should never happen for valid s-t/v
        }
        List variants = targets.get(citrg);
        if (variants == null) {
            return; // should never happen for valid s-t/v
        }
        variants.remove(civar);
        if (variants.size() == 0) {
            targets.remove(citrg); // should delete variants
            if (targets.size() == 0) {
                specDAG.remove(cisrc); // should delete targets
            }
        }
    }

    private static final boolean DEBUG = false;

    /**
     * Attempt to find a source-target/variant in the dynamic registry
     * store.  Return 0 on failure.
     */
    private Object[] findInDynamicStore(Spec src,
                                      Spec trg,
                                      String variant) {
        String ID = TransliteratorIDParser.STVtoID(src.get(), trg.get(), variant);
        ///CLOVER:OFF
        if (DEBUG) {
            System.out.println("TransliteratorRegistry.findInDynamicStore:" +
                               ID);
        }
        ///CLOVER:ON
        return registry.get(new CaseInsensitiveString(ID));
    }

    /**
     * Attempt to find a source-target/variant in the static locale
     * resource store.  Do not perform fallback.  Return 0 on failure.
     *
     * On success, create a new entry object, register it in the dynamic
     * store, and return a pointer to it, but do not make it public --
     * just because someone requested something, we do not expand the
     * available ID list (or spec DAG).
     */
    private Object[] findInStaticStore(Spec src,
                                     Spec trg,
                                     String variant) {
        ///CLOVER:OFF
        if (DEBUG) {
            String ID = TransliteratorIDParser.STVtoID(src.get(), trg.get(), variant);
            System.out.println("TransliteratorRegistry.findInStaticStore:" +
                               ID);
        }
        ///CLOVER:ON
        Object[] entry = null;
        if (src.isLocale()) {
            entry = findInBundle(src, trg, variant, Transliterator.FORWARD);
        } else if (trg.isLocale()) {
            entry = findInBundle(trg, src, variant, Transliterator.REVERSE);
        }

        // If we found an entry, store it in the Hashtable for next
        // time.
        if (entry != null) {
            registerEntry(src.getTop(), trg.getTop(), variant, entry, false);
        }

        return entry;
    }

    /**
     * Attempt to find an entry in a single resource bundle.  This is
     * a one-sided lookup.  findInStaticStore() performs up to two such
     * lookups, one for the source, and one for the target.
     *
     * Do not perform fallback.  Return 0 on failure.
     *
     * On success, create a new Entry object, populate it, and return it.
     * The caller owns the returned object.
     */
    private Object[] findInBundle(Spec specToOpen,
                                  Spec specToFind,
                                  String variant,
                                  int direction) {
        // assert(specToOpen.isLocale());
        ResourceBundle res = specToOpen.getBundle();

        if (res == null) {
            // This means that the bundle's locale does not match
            // the current level of iteration for the spec.
            return null;
        }

        for (int pass=0; pass<2; ++pass) {
            StringBuilder tag = new StringBuilder();
            // First try either TransliteratorTo_xxx or
            // TransliterateFrom_xxx, then try the bidirectional
            // Transliterate_xxx.  This precedence order is arbitrary
            // but must be consistent and documented.
            if (pass == 0) {
                tag.append(direction == Transliterator.FORWARD ?
                           "TransliterateTo" : "TransliterateFrom");
            } else {
                tag.append("Transliterate");
            }
            tag.append(specToFind.get().toUpperCase(Locale.ENGLISH));

            try {
                // The Transliterate*_xxx resource is an array of
                // strings of the format { , , ... }.  Each
                //  is a variant name, and each  is a rule.
                String[] subres = res.getStringArray(tag.toString());

                // assert(subres != null);
                // assert(subres.length % 2 == 0);
                int i = 0;
                if (variant.length() != 0) {
                    for (i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy