com.ibm.icu.text.TransliteratorRegistry Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtdata-lib-realer Show documentation
Show all versions of virtdata-lib-realer Show documentation
With inspiration from other libraries
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
**********************************************************************
* Copyright (c) 2001-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 08/19/2001 aliu Creation.
**********************************************************************
*/
package com.ibm.icu.text;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.LocaleUtility;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.RuleBasedTransliterator.Data;
import com.ibm.icu.util.CaseInsensitiveString;
import com.ibm.icu.util.UResourceBundle;
class TransliteratorRegistry {
// char constants
private static final char LOCALE_SEP = '_';
// String constants
private static final String NO_VARIANT = ""; // empty string
private static final String ANY = "Any";
/**
* Dynamic registry mapping full IDs to Entry objects. This
* contains both public and internal entities. The visibility is
* controlled by whether an entry is listed in availableIDs and
* specDAG or not.
*
* Keys are CaseInsensitiveString objects.
* Values are objects of class Class (subclass of Transliterator),
* RuleBasedTransliterator.Data, Transliterator.Factory, or one
* of the entry classes defined here (AliasEntry or ResourceEntry).
*/
private Map registry;
/**
* DAG of visible IDs by spec. Hashtable: source => (Hashtable:
* target => (Vector: variant)) The Vector of variants is never
* empty. For a source-target with no variant, the special
* variant NO_VARIANT (the empty string) is stored in slot zero of
* the UVector.
*
* Keys are CaseInsensitiveString objects.
* Values are Hashtable of (CaseInsensitiveString -> Vector of
* CaseInsensitiveString)
*/
private Map>> specDAG;
/**
* Vector of public full IDs (CaseInsensitiveString objects).
*/
private List availableIDs;
//----------------------------------------------------------------------
// class Spec
//----------------------------------------------------------------------
/**
* A Spec is a string specifying either a source or a target. In more
* general terms, it may also specify a variant, but we only use the
* Spec class for sources and targets.
*
* A Spec may be a locale or a script. If it is a locale, it has a
* fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
* ssss is the script mapping of xx_YY_ZZZ. The Spec API methods
* hasFallback(), next(), and reset() iterate over this fallback
* sequence.
*
* The Spec class canonicalizes itself, so the locale is put into
* canonical form, or the script is transformed from an abbreviation
* to a full name.
*/
static class Spec {
private String top; // top spec
private String spec; // current spec
private String nextSpec; // next spec
private String scriptName; // script name equivalent of top, if != top
private boolean isSpecLocale; // TRUE if spec is a locale
private boolean isNextLocale; // TRUE if nextSpec is a locale
private ICUResourceBundle res;
public Spec(String theSpec) {
top = theSpec;
spec = null;
scriptName = null;
try{
// Canonicalize script name. If top is a script name then
// script != UScript.INVALID_CODE.
int script = UScript.getCodeFromName(top);
// Canonicalize script name -or- do locale->script mapping
int[] s = UScript.getCode(top);
if (s != null) {
scriptName = UScript.getName(s[0]);
// If the script name is the same as top then it's redundant
if (scriptName.equalsIgnoreCase(top)) {
scriptName = null;
}
}
isSpecLocale = false;
res = null;
// If 'top' is not a script name, try a locale lookup
if (script == UScript.INVALID_CODE) {
Locale toploc = LocaleUtility.getLocaleFromName(top);
res = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_TRANSLIT_BASE_NAME,toploc);
// Make sure we got the bundle we wanted; otherwise, don't use it
if (res!=null && LocaleUtility.isFallbackOf(res.getULocale().toString(), top)) {
isSpecLocale = true;
}
}
}catch(MissingResourceException e){
///CLOVER:OFF
// The constructor is called from multiple private methods
// that protects an invalid scriptName
scriptName = null;
///CLOVER:ON
}
// assert(spec != top);
reset();
}
public boolean hasFallback() {
return nextSpec != null;
}
public void reset() {
if (!Utility.sameObjects(spec, top)) {
spec = top;
isSpecLocale = (res != null);
setupNext();
}
}
private void setupNext() {
isNextLocale = false;
if (isSpecLocale) {
nextSpec = spec;
int i = nextSpec.lastIndexOf(LOCALE_SEP);
// If i == 0 then we have _FOO, so we fall through
// to the scriptName.
if (i > 0) {
nextSpec = spec.substring(0, i);
isNextLocale = true;
} else {
nextSpec = scriptName; // scriptName may be null
}
} else {
// Fallback to the script, which may be null
if (!Utility.sameObjects(nextSpec, scriptName)) {
nextSpec = scriptName;
} else {
nextSpec = null;
}
}
}
// Protocol:
// for(String& s(spec.get());
// spec.hasFallback(); s(spec.next())) { ...
public String next() {
spec = nextSpec;
isSpecLocale = isNextLocale;
setupNext();
return spec;
}
public String get() {
return spec;
}
public boolean isLocale() {
return isSpecLocale;
}
/**
* Return the ResourceBundle for this spec, at the current
* level of iteration. The level of iteration goes from
* aa_BB_CCC to aa_BB to aa. If the bundle does not
* correspond to the current level of iteration, return null.
* If isLocale() is false, always return null.
*/
public ResourceBundle getBundle() {
if (res != null &&
res.getULocale().toString().equals(spec)) {
return res;
}
return null;
}
public String getTop() {
return top;
}
}
//----------------------------------------------------------------------
// Entry classes
//----------------------------------------------------------------------
static class ResourceEntry {
public String resource;
public int direction;
public ResourceEntry(String n, int d) {
resource = n;
direction = d;
}
}
// An entry representing a rule in a locale resource bundle
static class LocaleEntry {
public String rule;
public int direction;
public LocaleEntry(String r, int d) {
rule = r;
direction = d;
}
}
static class AliasEntry {
public String alias;
public AliasEntry(String a) {
alias = a;
}
}
static class CompoundRBTEntry {
private String ID;
private List idBlockVector;
private List dataVector;
private UnicodeSet compoundFilter;
public CompoundRBTEntry(String theID, List theIDBlockVector,
List theDataVector,
UnicodeSet theCompoundFilter) {
ID = theID;
idBlockVector = theIDBlockVector;
dataVector = theDataVector;
compoundFilter = theCompoundFilter;
}
public Transliterator getInstance() {
List transliterators = new ArrayList();
int passNumber = 1;
int limit = Math.max(idBlockVector.size(), dataVector.size());
for (int i = 0; i < limit; i++) {
if (i < idBlockVector.size()) {
String idBlock = idBlockVector.get(i);
if (idBlock.length() > 0)
transliterators.add(Transliterator.getInstance(idBlock));
}
if (i < dataVector.size()) {
Data data = dataVector.get(i);
transliterators.add(new RuleBasedTransliterator("%Pass" + passNumber++, data, null));
}
}
Transliterator t = new CompoundTransliterator(transliterators, passNumber - 1);
t.setID(ID);
if (compoundFilter != null) {
t.setFilter(compoundFilter);
}
return t;
}
}
//----------------------------------------------------------------------
// class TransliteratorRegistry: Basic public API
//----------------------------------------------------------------------
public TransliteratorRegistry() {
registry = Collections.synchronizedMap(new HashMap());
specDAG = Collections.synchronizedMap(new HashMap>>());
availableIDs = new ArrayList();
}
/**
* Given a simple ID (forward direction, no inline filter, not
* compound) attempt to instantiate it from the registry. Return
* 0 on failure.
*
* Return a non-empty aliasReturn value if the ID points to an alias.
* We cannot instantiate it ourselves because the alias may contain
* filters or compounds, which we do not understand. Caller should
* make aliasReturn empty before calling.
*/
public Transliterator get(String ID,
StringBuffer aliasReturn) {
Object[] entry = find(ID);
return (entry == null) ? null
: instantiateEntry(ID, entry, aliasReturn);
}
/**
* Register a class. This adds an entry to the
* dynamic store, or replaces an existing entry. Any entry in the
* underlying static locale resource store is masked.
*/
public void put(String ID,
Class extends Transliterator> transliteratorSubclass,
boolean visible) {
registerEntry(ID, transliteratorSubclass, visible);
}
/**
* Register an ID and a factory function pointer. This adds an
* entry to the dynamic store, or replaces an existing entry. Any
* entry in the underlying static locale resource store is masked.
*/
public void put(String ID,
Transliterator.Factory factory,
boolean visible) {
registerEntry(ID, factory, visible);
}
/**
* Register an ID and a resource name. This adds an entry to the
* dynamic store, or replaces an existing entry. Any entry in the
* underlying static locale resource store is masked.
*/
public void put(String ID,
String resourceName,
int dir,
boolean visible) {
registerEntry(ID, new ResourceEntry(resourceName, dir), visible);
}
/**
* Register an ID and an alias ID. This adds an entry to the
* dynamic store, or replaces an existing entry. Any entry in the
* underlying static locale resource store is masked.
*/
public void put(String ID,
String alias,
boolean visible) {
registerEntry(ID, new AliasEntry(alias), visible);
}
/**
* Register an ID and a Transliterator object. This adds an entry
* to the dynamic store, or replaces an existing entry. Any entry
* in the underlying static locale resource store is masked.
*/
public void put(String ID,
Transliterator trans,
boolean visible) {
registerEntry(ID, trans, visible);
}
/**
* Unregister an ID. This removes an entry from the dynamic store
* if there is one. The static locale resource store is
* unaffected.
*/
public void remove(String ID) {
String[] stv = TransliteratorIDParser.IDtoSTV(ID);
// Only need to do this if ID.indexOf('-') < 0
String id = TransliteratorIDParser.STVtoID(stv[0], stv[1], stv[2]);
registry.remove(new CaseInsensitiveString(id));
removeSTV(stv[0], stv[1], stv[2]);
availableIDs.remove(new CaseInsensitiveString(id));
}
//----------------------------------------------------------------------
// class TransliteratorRegistry: Public ID and spec management
//----------------------------------------------------------------------
/**
* An internal class that adapts an enumeration over
* CaseInsensitiveStrings to an enumeration over Strings.
*/
private static class IDEnumeration implements Enumeration {
Enumeration en;
public IDEnumeration(Enumeration e) {
en = e;
}
@Override
public boolean hasMoreElements() {
return en != null && en.hasMoreElements();
}
@Override
public String nextElement() {
return (en.nextElement()).getString();
}
}
/**
* Returns an enumeration over the programmatic names of visible
* registered transliterators.
*
* @return An Enumeration
over String
objects
*/
public Enumeration getAvailableIDs() {
// Since the cache contains CaseInsensitiveString objects, but
// the caller expects Strings, we have to use an intermediary.
return new IDEnumeration(Collections.enumeration(availableIDs));
}
/**
* Returns an enumeration over all visible source names.
*
* @return An Enumeration
over String
objects
*/
public Enumeration getAvailableSources() {
return new IDEnumeration(Collections.enumeration(specDAG.keySet()));
}
/**
* Returns an enumeration over visible target names for the given
* source.
*
* @return An Enumeration
over String
objects
*/
public Enumeration getAvailableTargets(String source) {
CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
Map> targets = specDAG.get(cisrc);
if (targets == null) {
return new IDEnumeration(null);
}
return new IDEnumeration(Collections.enumeration(targets.keySet()));
}
/**
* Returns an enumeration over visible variant names for the given
* source and target.
*
* @return An Enumeration
over String
objects
*/
public Enumeration getAvailableVariants(String source, String target) {
CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
CaseInsensitiveString citrg = new CaseInsensitiveString(target);
Map> targets = specDAG.get(cisrc);
if (targets == null) {
return new IDEnumeration(null);
}
List variants = targets.get(citrg);
if (variants == null) {
return new IDEnumeration(null);
}
return new IDEnumeration(Collections.enumeration(variants));
}
//----------------------------------------------------------------------
// class TransliteratorRegistry: internal
//----------------------------------------------------------------------
/**
* Convenience method. Calls 6-arg registerEntry().
*/
private void registerEntry(String source,
String target,
String variant,
Object entry,
boolean visible) {
String s = source;
if (s.length() == 0) {
s = ANY;
}
String ID = TransliteratorIDParser.STVtoID(source, target, variant);
registerEntry(ID, s, target, variant, entry, visible);
}
/**
* Convenience method. Calls 6-arg registerEntry().
*/
private void registerEntry(String ID,
Object entry,
boolean visible) {
String[] stv = TransliteratorIDParser.IDtoSTV(ID);
// Only need to do this if ID.indexOf('-') < 0
String id = TransliteratorIDParser.STVtoID(stv[0], stv[1], stv[2]);
registerEntry(id, stv[0], stv[1], stv[2], entry, visible);
}
/**
* Register an entry object (adopted) with the given ID, source,
* target, and variant strings.
*/
private void registerEntry(String ID,
String source,
String target,
String variant,
Object entry,
boolean visible) {
CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
Object[] arrayOfObj;
// Store the entry within an array so it can be modified later
if (entry instanceof Object[]) {
arrayOfObj = (Object[])entry;
} else {
arrayOfObj = new Object[] { entry };
}
registry.put(ciID, arrayOfObj);
if (visible) {
registerSTV(source, target, variant);
if (!availableIDs.contains(ciID)) {
availableIDs.add(ciID);
}
} else {
removeSTV(source, target, variant);
availableIDs.remove(ciID);
}
}
/**
* Register a source-target/variant in the specDAG. Variant may be
* empty, but source and target must not be. If variant is empty then
* the special variant NO_VARIANT is stored in slot zero of the
* UVector of variants.
*/
private void registerSTV(String source,
String target,
String variant) {
// assert(source.length() > 0);
// assert(target.length() > 0);
CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
CaseInsensitiveString citrg = new CaseInsensitiveString(target);
CaseInsensitiveString civar = new CaseInsensitiveString(variant);
Map> targets = specDAG.get(cisrc);
if (targets == null) {
targets = Collections.synchronizedMap(new HashMap>());
specDAG.put(cisrc, targets);
}
List variants = targets.get(citrg);
if (variants == null) {
variants = new ArrayList();
targets.put(citrg, variants);
}
// assert(NO_VARIANT == "");
// We add the variant string. If it is the special "no variant"
// string, that is, the empty string, we add it at position zero.
if (!variants.contains(civar)) {
if (variant.length() > 0) {
variants.add(civar);
} else {
variants.add(0, civar);
}
}
}
/**
* Remove a source-target/variant from the specDAG.
*/
private void removeSTV(String source,
String target,
String variant) {
// assert(source.length() > 0);
// assert(target.length() > 0);
CaseInsensitiveString cisrc = new CaseInsensitiveString(source);
CaseInsensitiveString citrg = new CaseInsensitiveString(target);
CaseInsensitiveString civar = new CaseInsensitiveString(variant);
Map> targets = specDAG.get(cisrc);
if (targets == null) {
return; // should never happen for valid s-t/v
}
List variants = targets.get(citrg);
if (variants == null) {
return; // should never happen for valid s-t/v
}
variants.remove(civar);
if (variants.size() == 0) {
targets.remove(citrg); // should delete variants
if (targets.size() == 0) {
specDAG.remove(cisrc); // should delete targets
}
}
}
private static final boolean DEBUG = false;
/**
* Attempt to find a source-target/variant in the dynamic registry
* store. Return 0 on failure.
*/
private Object[] findInDynamicStore(Spec src,
Spec trg,
String variant) {
String ID = TransliteratorIDParser.STVtoID(src.get(), trg.get(), variant);
///CLOVER:OFF
if (DEBUG) {
System.out.println("TransliteratorRegistry.findInDynamicStore:" +
ID);
}
///CLOVER:ON
return registry.get(new CaseInsensitiveString(ID));
}
/**
* Attempt to find a source-target/variant in the static locale
* resource store. Do not perform fallback. Return 0 on failure.
*
* On success, create a new entry object, register it in the dynamic
* store, and return a pointer to it, but do not make it public --
* just because someone requested something, we do not expand the
* available ID list (or spec DAG).
*/
private Object[] findInStaticStore(Spec src,
Spec trg,
String variant) {
///CLOVER:OFF
if (DEBUG) {
String ID = TransliteratorIDParser.STVtoID(src.get(), trg.get(), variant);
System.out.println("TransliteratorRegistry.findInStaticStore:" +
ID);
}
///CLOVER:ON
Object[] entry = null;
if (src.isLocale()) {
entry = findInBundle(src, trg, variant, Transliterator.FORWARD);
} else if (trg.isLocale()) {
entry = findInBundle(trg, src, variant, Transliterator.REVERSE);
}
// If we found an entry, store it in the Hashtable for next
// time.
if (entry != null) {
registerEntry(src.getTop(), trg.getTop(), variant, entry, false);
}
return entry;
}
/**
* Attempt to find an entry in a single resource bundle. This is
* a one-sided lookup. findInStaticStore() performs up to two such
* lookups, one for the source, and one for the target.
*
* Do not perform fallback. Return 0 on failure.
*
* On success, create a new Entry object, populate it, and return it.
* The caller owns the returned object.
*/
private Object[] findInBundle(Spec specToOpen,
Spec specToFind,
String variant,
int direction) {
// assert(specToOpen.isLocale());
ResourceBundle res = specToOpen.getBundle();
if (res == null) {
// This means that the bundle's locale does not match
// the current level of iteration for the spec.
return null;
}
for (int pass=0; pass<2; ++pass) {
StringBuilder tag = new StringBuilder();
// First try either TransliteratorTo_xxx or
// TransliterateFrom_xxx, then try the bidirectional
// Transliterate_xxx. This precedence order is arbitrary
// but must be consistent and documented.
if (pass == 0) {
tag.append(direction == Transliterator.FORWARD ?
"TransliterateTo" : "TransliterateFrom");
} else {
tag.append("Transliterate");
}
tag.append(specToFind.get().toUpperCase(Locale.ENGLISH));
try {
// The Transliterate*_xxx resource is an array of
// strings of the format { , , ... }. Each
// is a variant name, and each is a rule.
String[] subres = res.getStringArray(tag.toString());
// assert(subres != null);
// assert(subres.length % 2 == 0);
int i = 0;
if (variant.length() != 0) {
for (i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy