com.ibm.icu.impl.UnicodeMap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
The newest version!
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 1996-2016, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.impl;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.text.StringTransform;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.Freezable;
/**
* Class for mapping Unicode characters and strings to values, optimized for single code points,
* where ranges of code points have the same value.
* Much smaller storage than using HashMap, and much faster and more compact than
* a list of UnicodeSets. The API design mimics Map but can't extend it due to some
* necessary changes (much as UnicodeSet mimics Set). Note that nulls are not permitted as values;
* that is, a put(x,null) is the same as remove(x).
* At this point "" is also not allowed as a key, although that may change.
* @author markdavis
*
* @internal CLDR
*/
public final class UnicodeMap implements Cloneable, Freezable>, StringTransform, Iterable {
/**
* For serialization
*/
//private static final long serialVersionUID = -6540936876295804105L;
static final boolean ASSERTIONS = false;
static final long GROWTH_PERCENT = 200; // 100 is no growth!
static final long GROWTH_GAP = 10; // extra bump!
private int length;
// two parallel arrays to save memory. Wish Java had structs.
private int[] transitions;
/* package private */ T[] values;
private LinkedHashSet availableValues = new LinkedHashSet();
private transient boolean staleAvailableValues;
private transient boolean errorOnReset;
private volatile transient boolean locked;
private int lastIndex;
private TreeMap stringMap;
{ clear(); }
public UnicodeMap() {
}
public UnicodeMap(UnicodeMap other) {
this.putAll(other);
}
public UnicodeMap clear() {
if (locked) {
throw new UnsupportedOperationException("Attempt to modify locked object");
}
length = 2;
transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0};
values = (T[]) new Object[10];
availableValues.clear();
staleAvailableValues = false;
errorOnReset = false;
lastIndex = 0;
stringMap = null;
return this;
}
/* Boilerplate */
public boolean equals(Object other) {
if (other == null) return false;
try {
UnicodeMap that = (UnicodeMap) other;
if (length != that.length) return false;
for (int i = 0; i < length-1; ++i) {
if (transitions[i] != that.transitions[i]) return false;
if (!areEqual(values[i], that.values[i])) return false;
}
return true;
} catch (ClassCastException e) {
return false;
}
}
public static boolean areEqual(Object a , Object b) {
if (a == b) return true;
if (a == null || b == null) return false;
return a.equals(b);
}
public int hashCode() {
int result = length;
// TODO might want to abbreviate this for speed.
for (int i = 0; i < length-1; ++i) {
result = 37*result + transitions[i];
result = 37*result;
if (values[i] != null) {
result += values[i].hashCode();
}
}
if (stringMap != null) {
result = 37*result + stringMap.hashCode();
}
return result;
}
/**
* Standard clone. Warning, as with Collections, does not do deep clone.
*/
public UnicodeMap cloneAsThawed() {
UnicodeMap that = new UnicodeMap();
that.length = length;
that.transitions = (int[]) transitions.clone();
that.values = (T[]) values.clone();
that.availableValues = new LinkedHashSet(availableValues);
that.locked = false;
that.stringMap = stringMap == null ? null : (TreeMap) stringMap.clone();
return that;
}
/* for internal consistency checking */
void _checkInvariants() {
if (length < 2
|| length > transitions.length
|| transitions.length != values.length) {
throw new IllegalArgumentException("Invariant failed: Lengths bad");
}
for (int i = 1; i < length-1; ++i) {
if (areEqual(values[i-1], values[i])) {
throw new IllegalArgumentException("Invariant failed: values shared at "
+ "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
+ "\t" + Utility.hex(i) + ": <" + values[i] + ">"
);
}
}
if (transitions[0] != 0 || transitions[length-1] != 0x110000) {
throw new IllegalArgumentException("Invariant failed: bounds set wrong");
}
for (int i = 1; i < length-1; ++i) {
if (transitions[i-1] >= transitions[i]) {
throw new IllegalArgumentException("Invariant failed: not monotonic"
+ "\t" + Utility.hex(i-1) + ": " + transitions[i-1]
+ "\t" + Utility.hex(i) + ": " + transitions[i]
);
}
}
}
/**
* Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
* Assumes that 0 <= codepoint <= 0x10FFFF
* @param codepoint
* @return the index
*/
private int _findIndex(int c) {
int lo = 0;
int hi = length - 1;
int i = (lo + hi) >>> 1;
// invariant: c >= list[lo]
// invariant: c < list[hi]
while (i != lo) {
if (c < transitions[i]) {
hi = i;
} else {
lo = i;
}
i = (lo + hi) >>> 1;
}
if (ASSERTIONS) _checkFind(c, lo);
return lo;
}
private void _checkFind(int codepoint, int value) {
int other = __findIndex(codepoint);
if (other != value) {
throw new IllegalArgumentException("Invariant failed: binary search"
+ "\t" + Utility.hex(codepoint) + ": " + value
+ "\tshould be: " + other);
}
}
private int __findIndex(int codepoint) {
for (int i = length-1; i > 0; --i) {
if (transitions[i] <= codepoint) return i;
}
return 0;
}
/*
* Try indexed lookup
static final int SHIFT = 8;
int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found
boolean startsValid = false;
private int findIndex(int codepoint) {
if (!startsValid) {
int start = 0;
for (int i = 1; i < length; ++i) {
}
}
for (int i = length-1; i > 0; --i) {
if (transitions[i] <= codepoint) return i;
}
return 0;
}
*/
/**
* Remove the items from index through index+count-1.
* Logically reduces the size of the internal arrays.
* @param index
* @param count
*/
private void _removeAt(int index, int count) {
for (int i = index + count; i < length; ++i) {
transitions[i-count] = transitions[i];
values[i-count] = values[i];
}
length -= count;
}
/**
* Add a gap from index to index+count-1.
* The values there are undefined, and must be set.
* Logically grows arrays to accommodate. Actual growth is limited
* @param index
* @param count
*/
private void _insertGapAt(int index, int count) {
int newLength = length + count;
int[] oldtransitions = transitions;
T[] oldvalues = values;
if (newLength > transitions.length) {
int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100);
transitions = new int[allocation];
values = (T[]) new Object[allocation];
for (int i = 0; i < index; ++i) {
transitions[i] = oldtransitions[i];
values[i] = oldvalues[i];
}
}
for (int i = length - 1; i >= index; --i) {
transitions[i+count] = oldtransitions[i];
values[i+count] = oldvalues[i];
}
length = newLength;
}
/**
* Associates code point with value. Removes any previous association.
* All code that calls this MUST check for frozen first!
* @param codepoint
* @param value
* @return this, for chaining
*/
private UnicodeMap _put(int codepoint, T value) {
// Warning: baseIndex is an invariant; must
// be defined such that transitions[baseIndex] < codepoint
// at end of this routine.
int baseIndex;
if (transitions[lastIndex] <= codepoint
&& codepoint < transitions[lastIndex+1]) {
baseIndex = lastIndex;
} else {
baseIndex = _findIndex(codepoint);
}
int limitIndex = baseIndex + 1;
// cases are (a) value is already set
if (areEqual(values[baseIndex], value)) return this;
if (locked) {
throw new UnsupportedOperationException("Attempt to modify locked object");
}
if (errorOnReset && values[baseIndex] != null) {
throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint)
+ " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
}
// adjust the available values
staleAvailableValues = true;
availableValues.add(value); // add if not there already
int baseCP = transitions[baseIndex];
int limitCP = transitions[limitIndex];
// we now start walking through the difference case,
// based on whether we are at the start or end of range
// and whether the range is a single character or multiple
if (baseCP == codepoint) {
// CASE: At very start of range
boolean connectsWithPrevious =
baseIndex != 0 && areEqual(value, values[baseIndex-1]);
if (limitCP == codepoint + 1) {
// CASE: Single codepoint range
boolean connectsWithFollowing =
baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
if (connectsWithPrevious) {
// A1a connects with previous & following, so remove index
if (connectsWithFollowing) {
_removeAt(baseIndex, 2);
} else {
_removeAt(baseIndex, 1); // extend previous
}
--baseIndex; // fix up
} else if (connectsWithFollowing) {
_removeAt(baseIndex, 1); // extend following backwards
transitions[baseIndex] = codepoint;
} else {
// doesn't connect on either side, just reset
values[baseIndex] = value;
}
} else if (connectsWithPrevious) {
// A.1: start of multi codepoint range
// if connects
++transitions[baseIndex]; // extend previous
} else {
// otherwise insert new transition
transitions[baseIndex] = codepoint+1; // fix following range
_insertGapAt(baseIndex, 1);
values[baseIndex] = value;
transitions[baseIndex] = codepoint;
}
} else if (limitCP == codepoint + 1) {
// CASE: at end of range
// if connects, just back up range
boolean connectsWithFollowing =
baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
if (connectsWithFollowing) {
--transitions[limitIndex];
return this;
} else {
_insertGapAt(limitIndex, 1);
transitions[limitIndex] = codepoint;
values[limitIndex] = value;
}
} else {
// CASE: in middle of range
// insert gap, then set the new range
_insertGapAt(++baseIndex,2);
transitions[baseIndex] = codepoint;
values[baseIndex] = value;
transitions[baseIndex+1] = codepoint + 1;
values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
}
lastIndex = baseIndex; // store for next time
return this;
}
private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) {
// TODO optimize
for (int i = startCodePoint; i <= endCodePoint; ++i) {
_put(i, value);
if (ASSERTIONS) _checkInvariants();
}
return this;
}
/**
* Sets the codepoint value.
* @param codepoint
* @param value
* @return this (for chaining)
*/
public UnicodeMap put(int codepoint, T value) {
if (codepoint < 0 || codepoint > 0x10FFFF) {
throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
}
_put(codepoint, value);
if (ASSERTIONS) _checkInvariants();
return this;
}
/**
* Sets the codepoint value.
* @param codepoint
* @param value
* @return this (for chaining)
*/
public UnicodeMap put(String string, T value) {
int v = UnicodeSet.getSingleCodePoint(string);
if (v == Integer.MAX_VALUE) {
if (locked) {
throw new UnsupportedOperationException("Attempt to modify locked object");
}
if (value != null) {
if (stringMap == null) {
stringMap = new TreeMap();
}
stringMap.put(string, value);
staleAvailableValues = true;
} else if (stringMap != null) {
if (stringMap.remove(string) != null) {
staleAvailableValues = true;
}
}
return this;
}
return put(v, value);
}
/**
* Adds bunch o' codepoints; otherwise like put.
* @param codepoints
* @param value
* @return this (for chaining)
*/
public UnicodeMap putAll(UnicodeSet codepoints, T value) {
UnicodeSetIterator it = new UnicodeSetIterator(codepoints);
while (it.nextRange()) {
if (it.string == null) {
_putAll(it.codepoint, it.codepointEnd, value);
} else {
put(it.string, value);
}
}
return this;
}
/**
* Adds bunch o' codepoints; otherwise like add.
* @param startCodePoint
* @param endCodePoint
* @param value
* @return this (for chaining)
*/
public UnicodeMap putAll(int startCodePoint, int endCodePoint, T value) {
if (locked) {
throw new UnsupportedOperationException("Attempt to modify locked object");
}
if (startCodePoint < 0 || endCodePoint > 0x10FFFF) {
throw new IllegalArgumentException("Codepoint out of range: "
+ Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint));
}
return _putAll(startCodePoint, endCodePoint, value);
}
/**
* Add all the (main) values from a UnicodeMap
* @param unicodeMap the property to add to the map
* @return this (for chaining)
*/
public UnicodeMap putAll(UnicodeMap unicodeMap) {
for (int i = 0; i < unicodeMap.length; ++i) {
T value = unicodeMap.values[i];
if (value != null) {
_putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value);
}
if (ASSERTIONS) _checkInvariants();
}
if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) {
if (stringMap == null) {
stringMap = new TreeMap();
}
stringMap.putAll(unicodeMap.stringMap);
}
return this;
}
/**
* Add all the (main) values from a Unicode property
* @param prop the property to add to the map
* @return this (for chaining)
*/
public UnicodeMap putAllFiltered(UnicodeMap prop, UnicodeSet filter) {
// TODO optimize
for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) {
if (it.codepoint != UnicodeSetIterator.IS_STRING) {
T value = prop.getValue(it.codepoint);
if (value != null) {
_put(it.codepoint, value);
}
}
}
// now do the strings
for (String key : filter.strings()) {
T value = prop.get(key);
if (value != null) {
put(key, value);
}
}
return this;
}
/**
* Set the currently unmapped Unicode code points to the given value.
* @param value the value to set
* @return this (for chaining)
*/
public UnicodeMap setMissing(T value) {
// fast path, if value not yet present
if (!getAvailableValues().contains(value)) {
staleAvailableValues = true;
availableValues.add(value);
for (int i = 0; i < length; ++i) {
if (values[i] == null) values[i] = value;
}
return this;
} else {
return putAll(keySet(null), value);
}
}
/**
* Returns the keyset consisting of all the keys that would produce the given value. Deposits into
* result if it is not null. Remember to clear if you just want
* the new values.
*/
public UnicodeSet keySet(T value, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
for (int i = 0; i < length - 1; ++i) {
if (areEqual(value, values[i])) {
result.add(transitions[i], transitions[i+1]-1);
}
}
if (value != null && stringMap != null) {
for (String key : stringMap.keySet()) {
T newValue = stringMap.get(key);
if (value.equals(newValue)) {
result.add((String)key);
}
}
}
return result;
}
/**
* Returns the keyset consisting of all the keys that would produce the given value.
* the new values.
*/
public UnicodeSet keySet(T value) {
return keySet(value,null);
}
/**
* Returns the keyset consisting of all the keys that would produce (non-null) values.
*/
public UnicodeSet keySet() {
UnicodeSet result = new UnicodeSet();
for (int i = 0; i < length - 1; ++i) {
if (values[i] != null) {
result.add(transitions[i], transitions[i+1]-1);
}
}
if (stringMap != null) {
result.addAll(stringMap.keySet());
}
return result;
}
/**
* Returns the list of possible values. Deposits each non-null value into
* result. Creates result if it is null. Remember to clear result if
* you are not appending to existing collection.
* @param result
* @return result
*/
public > U values(U result) {
if (staleAvailableValues) {
// collect all the current values
// retain them in the availableValues
Set temp = new HashSet();
for (int i = 0; i < length - 1; ++i) {
if (values[i] != null) temp.add(values[i]);
}
availableValues.retainAll(temp);
if (stringMap != null) {
availableValues.addAll(stringMap.values());
}
staleAvailableValues = false;
}
if (result == null) {
result = (U) new LinkedHashSet(availableValues.size());
}
result.addAll(availableValues);
return result;
}
/**
* Convenience method
*/
public Set values() {
return getAvailableValues(null);
}
/**
* Gets the value associated with a given code point.
* Returns null, if there is no such value.
* @param codepoint
* @return the value
*/
public T get(int codepoint) {
if (codepoint < 0 || codepoint > 0x10FFFF) {
throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
}
return values[_findIndex(codepoint)];
}
/**
* Gets the value associated with a given code point.
* Returns null, if there is no such value.
* @param codepoint
* @return the value
*/
public T get(String value) {
if (UTF16.hasMoreCodePointsThan(value, 1)) {
if (stringMap == null) {
return null;
}
return stringMap.get(value);
}
return getValue(UTF16.charAt(value, 0));
}
/**
* Change a new string from the source string according to the mappings.
* For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString()
* TODO: extend to strings
* @param source
* @return
*/
public String transform(String source) {
StringBuffer result = new StringBuffer();
int cp;
for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(source, i);
T mResult = getValue(cp);
if (mResult != null) {
result.append(mResult);
} else {
UTF16.append(result, cp);
}
}
return result.toString();
}
/**
* Used to add complex values, where the value isn't replaced but in some sense composed
* @author markdavis
*/
public abstract static class Composer {
/**
* This will be called with either a string or a code point. The result is the new value for that item.
* If the codepoint is used, the string is null; if the string is used, the codepoint is -1.
* @param a
* @param b
*/
public abstract T compose(int codePoint, String string, T a, T b);
}
public UnicodeMap composeWith(UnicodeMap other, Composer composer) {
for (T value : other.getAvailableValues()) {
UnicodeSet set = other.keySet(value);
composeWith(set, value, composer);
}
return this;
}
public UnicodeMap composeWith(UnicodeSet set, T value, Composer composer) {
for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
int i = it.codepoint;
if (i == UnicodeSetIterator.IS_STRING) {
String s = it.string;
T v1 = getValue(s);
T v3 = composer.compose(-1, s, v1, value);
if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
put(s, v3);
}
} else {
T v1 = getValue(i);
T v3 = composer.compose(i, null, v1, value);
if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
put(i, v3);
}
}
}
return this;
}
public String toString() {
return toString(null);
}
public String toString(Comparator collected) {
StringBuffer result = new StringBuffer();
if (collected == null) {
for (int i = 0; i < length-1; ++i) {
T value = values[i];
if (value == null) continue;
int start = transitions[i];
int end = transitions[i+1]-1;
result.append(Utility.hex(start));
if (start != end) result.append("-").append(Utility.hex(end));
result.append("=").append(value.toString()).append("\n");
}
if (stringMap != null) {
for (String s : stringMap.keySet()) {
result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n");
}
}
} else {
Set set = values(new TreeSet(collected));
for (Iterator it = set.iterator(); it.hasNext();) {
T value = it.next();
UnicodeSet s = keySet(value);
result.append(value).append("=").append(s.toString()).append("\n");
}
}
return result.toString();
}
/**
* @return Returns the errorOnReset value.
*/
public boolean getErrorOnReset() {
return errorOnReset;
}
/**
* Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception.
* @param errorOnReset The errorOnReset to set.
*/
public UnicodeMap setErrorOnReset(boolean errorOnReset) {
this.errorOnReset = errorOnReset;
return this;
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.Freezable#isFrozen()
*/
public boolean isFrozen() {
// TODO Auto-generated method stub
return locked;
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.Freezable#lock()
*/
public UnicodeMap freeze() {
locked = true;
return this;
}
/**
* Utility to find the maximal common prefix of two strings.
* TODO: fix supplemental support
*/
static public int findCommonPrefix(String last, String s) {
int minLen = Math.min(last.length(), s.length());
for (int i = 0; i < minLen; ++i) {
if (last.charAt(i) != s.charAt(i)) return i;
}
return minLen;
}
/**
* Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings().
*/
public int getRangeCount() {
return length-1;
}
/**
* Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
*/
public int getRangeStart(int range) {
return transitions[range];
}
/**
* Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
*/
public int getRangeEnd(int range) {
return transitions[range+1] - 1;
}
/**
* Get the value for the range.
*/
public T getRangeValue(int range) {
return values[range];
}
/**
* Get the strings that are not in the ranges. Returns null if there are none.
* @return
*/
public Set getNonRangeStrings() {
if (stringMap == null || stringMap.isEmpty()) {
return null;
}
return Collections.unmodifiableSet(stringMap.keySet());
}
static final boolean DEBUG_WRITE = false;
/* (non-Javadoc)
* @see java.util.Map#containsKey(java.lang.Object)
*/
public boolean containsKey(String key) {
return getValue(key) != null;
}
/* (non-Javadoc)
* @see java.util.Map#containsKey(java.lang.Object)
*/
public boolean containsKey(int key) {
return getValue(key) != null;
}
/* (non-Javadoc)
* @see java.util.Map#containsValue(java.lang.Object)
*/
public boolean containsValue(T value) {
// TODO Optimize
return getAvailableValues().contains(value);
}
/* (non-Javadoc)
* @see java.util.Map#isEmpty()
*/
public boolean isEmpty() {
return size() == 0;
}
/* (non-Javadoc)
* @see java.util.Map#putAll(java.util.Map)
*/
public UnicodeMap putAll(Map extends String, ? extends T> map) {
for (String key : map.keySet()) {
put(key,map.get(key));
}
return this;
}
/**
* Utility for extracting map
* @deprecated
*/
public UnicodeMap putAllIn(Map super String, ? super T> map) {
for (String key : keySet()) {
map.put(key, get(key));
}
return this;
}
/**
* Utility for extracting map
*/
public > U putAllInto(U map) {
for (EntryRange entry : entryRanges()) {
if (entry.string != null) {
break;
}
for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) {
map.put(UTF16.valueOf(cp), entry.value);
}
}
map.putAll(stringMap);
return map;
}
/**
* Utility for extracting map
*/
public > U putAllCodepointsInto(U map) {
for (EntryRange entry : entryRanges()) {
if (entry.string != null) {
break;
}
for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) {
map.put(cp, entry.value);
}
}
return map;
}
/* (non-Javadoc)
* @see java.util.Map#remove(java.lang.Object)
*/
public UnicodeMap remove(String key) {
return put(key, null);
}
/* (non-Javadoc)
* @see java.util.Map#remove(java.lang.Object)
*/
public UnicodeMap remove(int key) {
return put(key, null);
}
/* (non-Javadoc)
* @see java.util.Map#size()
*/
public int size() {
int result = stringMap == null ? 0 : stringMap.size();
for (int i = 0; i < length-1; ++i) {
T value = values[i];
if (value == null) continue;
result += transitions[i+1] - transitions[i];
}
return result;
}
/* (non-Javadoc)
* @see java.util.Map#entrySet()
*/
public Iterable> entrySet() {
return new EntrySetX();
}
private class EntrySetX implements Iterable> {
public Iterator> iterator() {
return new IteratorX();
}
public String toString() {
StringBuffer b = new StringBuffer();
for (Iterator it = iterator(); it.hasNext();) {
Object item = it.next();
b.append(item.toString()).append(' ');
}
return b.toString();
}
}
private class IteratorX implements Iterator> {
Iterator iterator = keySet().iterator();
/* (non-Javadoc)
* @see java.util.Iterator#hasNext()
*/
public boolean hasNext() {
return iterator.hasNext();
}
/* (non-Javadoc)
* @see java.util.Iterator#next()
*/
public Entry next() {
String key = iterator.next();
return new ImmutableEntry(key, get(key));
}
/* (non-Javadoc)
* @see java.util.Iterator#remove()
*/
public void remove() {
throw new UnsupportedOperationException();
}
}
/**
* Struct-like class used to iterate over a UnicodeMap in a for loop.
* If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null;
* Caution: The contents may change during the iteration!
*/
public static class EntryRange {
public int codepoint;
public int codepointEnd;
public String string;
public T value;
@Override
public String toString() {
return (string != null ? Utility.hex(string)
: Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd)))
+ "=" + value;
}
}
/**
* Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps.
* Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration!
* The value is guaranteed never to be null. The entryRange.string values (non-null) are after all the ranges.
* @return entry range, for for loops
*/
public Iterable> entryRanges() {
return new EntryRanges();
}
private class EntryRanges implements Iterable>, Iterator> {
private int pos;
private EntryRange result = new EntryRange();
private int lastRealRange = values[length-2] == null ? length - 2 : length - 1;
private Iterator> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator();
public Iterator> iterator() {
return this;
}
public boolean hasNext() {
return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext());
}
public EntryRange next() {
// a range may be null, but then the next one must not be (except the final range)
if (pos < lastRealRange) {
T temp = values[pos];
if (temp == null) {
temp = values[++pos];
}
result.codepoint = transitions[pos];
result.codepointEnd = transitions[pos+1]-1;
result.string = null;
result.value = temp;
++pos;
} else {
Entry entry = stringIterator.next();
result.codepoint = result.codepointEnd = -1;
result.string = entry.getKey();
result.value = entry.getValue();
}
return result;
}
public void remove() {
throw new UnsupportedOperationException();
}
}
/* (non-Javadoc)
* @see java.lang.Iterable#iterator()
*/
public Iterator iterator() {
return keySet().iterator();
}
/**
* Old form for compatibility
*/
public T getValue(String key) {
return get(key);
}
/**
* Old form for compatibility
*/
public T getValue(int key) {
// TODO Auto-generated method stub
return get(key);
}
/**
* Old form for compatibility
*/
public Collection getAvailableValues() {
return values();
}
/**
* Old form for compatibility
*/
public > U getAvailableValues(U result) {
return values(result);
}
/**
* Old form for compatibility
*/
public UnicodeSet getSet(T value) {
return keySet(value);
}
/**
* Old form for compatibility
*/
public UnicodeSet getSet(T value, UnicodeSet result) {
return keySet(value, result);
}
// This is to support compressed serialization. It works; just commented out for now as we shift to Generics
// TODO Fix once generics are cleaned up.
// // TODO Fix to serialize more than just strings.
// // Only if all the items are strings will we do the following compression
// // Otherwise we'll just use Java Serialization, bulky as it is
// public void writeExternal(ObjectOutput out1) throws IOException {
// DataOutputCompressor sc = new DataOutputCompressor(out1);
// // if all objects are strings
// Collection availableVals = getAvailableValues();
// boolean allStrings = allAreString(availableVals);
// sc.writeBoolean(allStrings);
// Map object_index = new LinkedHashMap();
// if (allAreString(availableVals)) {
// sc.writeStringSet(new TreeSet(availableVals), object_index);
// } else {
// sc.writeCollection(availableVals, object_index);
// }
// sc.writeUInt(length);
// int lastTransition = -1;
// int lastValueNumber = 0;
// if (DEBUG_WRITE) System.out.println("Trans count: " + length);
// for (int i = 0; i < length; ++i) {
// int valueNumber = ((Integer)object_index.get(values[i])).intValue();
// if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber);
//
// int deltaTransition = transitions[i] - lastTransition;
// lastTransition = transitions[i];
// int deltaValueNumber = valueNumber - lastValueNumber;
// lastValueNumber = valueNumber;
//
// deltaValueNumber <<= 1; // make room for one bit
// boolean canCombine = deltaTransition == 1;
// if (canCombine) deltaValueNumber |= 1;
// sc.writeInt(deltaValueNumber);
// if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber);
// if (!canCombine) {
// sc.writeUInt(deltaTransition);
// if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
// }
// }
// sc.flush();
// }
//
// /**
// *
// */
// private boolean allAreString(Collection availableValues2) {
// //if (true) return false;
// for (Iterator it = availableValues2.iterator(); it.hasNext();) {
// if (!(it.next() instanceof String)) return false;
// }
// return true;
// }
//
// public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException {
// DataInputCompressor sc = new DataInputCompressor(in1);
// boolean allStrings = sc.readBoolean();
// T[] valuesList;
// availableValues = new LinkedHashSet();
// if (allStrings) {
// valuesList = sc.readStringSet(availableValues);
// } else {
// valuesList = sc.readCollection(availableValues);
// }
// length = sc.readUInt();
// transitions = new int[length];
// if (DEBUG_WRITE) System.out.println("Trans count: " + length);
// values = (T[]) new Object[length];
// int currentTransition = -1;
// int currentValue = 0;
// int deltaTransition;
// for (int i = 0; i < length; ++i) {
// int temp = sc.readInt();
// if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp);
// boolean combined = (temp & 1) != 0;
// temp >>= 1;
// values[i] = valuesList[currentValue += temp];
// if (!combined) {
// deltaTransition = sc.readUInt();
// if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
// } else {
// deltaTransition = 1;
// }
// transitions[i] = currentTransition += deltaTransition; // delta value
// if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue);
// }
// }
public final UnicodeMap removeAll(UnicodeSet set) {
return putAll(set, null);
}
public final UnicodeMap removeAll(UnicodeMap reference) {
return removeRetainAll(reference, true);
}
public final UnicodeMap retainAll(UnicodeSet set) {
UnicodeSet toNuke = new UnicodeSet();
// TODO Optimize
for (EntryRange ae : entryRanges()) {
if (ae.string != null) {
if (!set.contains(ae.string)) {
toNuke.add(ae.string);
}
} else {
for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
if (!set.contains(i)) {
toNuke.add(i);
}
}
}
}
return putAll(toNuke, null);
}
public final UnicodeMap retainAll(UnicodeMap reference) {
return removeRetainAll(reference, false);
}
private final UnicodeMap removeRetainAll(UnicodeMap reference, boolean remove) {
UnicodeSet toNuke = new UnicodeSet();
// TODO Optimize
for (EntryRange ae : entryRanges()) {
if (ae.string != null) {
if (ae.value.equals(reference.get(ae.string)) == remove) {
toNuke.add(ae.string);
}
} else {
for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
if (ae.value.equals(reference.get(i)) == remove) {
toNuke.add(i);
}
}
}
}
return putAll(toNuke, null);
}
/**
* Returns the keys that consist of multiple code points.
* @return
*/
public final Set stringKeys() {
return getNonRangeStrings();
}
/**
* Gets the inverse of this map, adding to the target. Like putAllIn
* @return
*/
public > U addInverseTo(U target) {
for (T value : values()) {
UnicodeSet uset = getSet(value);
target.put(value, uset);
}
return target;
}
/**
* Freeze an inverse map.
* @param target
* @return
*/
public static Map freeze(Map target) {
for (UnicodeSet entry : target.values()) {
entry.freeze();
}
return Collections.unmodifiableMap(target);
}
/**
* @param target
* @return
*/
public UnicodeMap putAllInverse(Map source) {
for (Entry entry : source.entrySet()) {
putAll(entry.getValue(), entry.getKey());
}
return this;
}
}