com.ibm.icu.text.SourceTargetUtility Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
*******************************************************************************
* Copyright (C) 2010-2011, Google, International Business Machines *
* Corporation and others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.util.HashSet;
import java.util.Set;
import com.ibm.icu.lang.CharSequences;
/**
* Simple internal utility class for helping with getSource/TargetSet
*/
class SourceTargetUtility {
final Transform transform;
final UnicodeSet sourceCache;
final Set sourceStrings;
static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
static Normalizer2 NFC = Normalizer2.getNFCInstance();
//static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();
public SourceTargetUtility(Transform transform) {
this(transform, null);
}
public SourceTargetUtility(Transform transform, Normalizer2 normalizer) {
this.transform = transform;
if (normalizer != null) {
// synchronized (SourceTargetUtility.class) {
// if (NFC == null) {
// NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
// for (int i = 0; i <= 0x10FFFF; ++i) {
// String d = NFC.getDecomposition(i);
// if (d == null) {
// continue;
// }
// String s = NFC.normalize(d);
// if (!CharSequences.equals(i, s)) {
// continue;
// }
// // composes
// boolean first = false;
// for (int trailing : CharSequences.codePoints(d)) {
// if (first) {
// first = false;
// } else {
// TRAILING_COMBINING.add(trailing);
// }
// }
// }
// }
// }
sourceCache = new UnicodeSet("[:^ccc=0:]");
} else {
sourceCache = new UnicodeSet();
}
sourceStrings = new HashSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
String s = transform.transform(UTF16.valueOf(i));
boolean added = false;
if (!CharSequences.equals(i, s)) {
sourceCache.add(i);
added = true;
}
if (normalizer == null) {
continue;
}
String d = NFC.getDecomposition(i);
if (d == null) {
continue;
}
s = transform.transform(d);
if (!d.equals(s)) {
sourceStrings.add(d);
}
if (added) {
continue;
}
if (!normalizer.isInert(i)) {
sourceCache.add(i);
continue;
}
// see if any of the non-starters change s; if so, add i
// for (String ns : TRAILING_COMBINING) {
// String s2 = transform.transform(s + ns);
// if (!s2.startsWith(s)) {
// sourceCache.add(i);
// break;
// }
// }
// int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
// if (endOfFirst >= d.length()) {
// continue;
// }
// // now add all initial substrings
// for (int j = 1; j < d.length(); ++j) {
// if (!CharSequences.onCharacterBoundary(d, j)) {
// continue;
// }
// String dd = d.substring(0,j);
// s = transform.transform(dd);
// if (!dd.equals(s)) {
// sourceStrings.add(dd);
// }
// }
}
sourceCache.freeze();
}
public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
UnicodeSet targetSet) {
UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter);
UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter);
sourceSet.addAll(affectedCharacters);
for (String s : affectedCharacters) {
targetSet.addAll(transform.transform(s));
}
for (String s : sourceStrings) {
if (myFilter.containsAll(s)) {
String t = transform.transform(s);
if (!s.equals(t)) {
targetSet.addAll(t);
sourceSet.addAll(s);
}
}
}
}
}