All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.UnicodeMap Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

The newest version!
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 1996-2016, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.impl;

import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import com.ibm.icu.text.StringTransform;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.Freezable;

/**
 * Class for mapping Unicode characters and strings to values, optimized for single code points, 
 * where ranges of code points have the same value.
 * Much smaller storage than using HashMap, and much faster and more compact than
 * a list of UnicodeSets. The API design mimics Map but can't extend it due to some
 * necessary changes (much as UnicodeSet mimics Set). Note that nulls are not permitted as values;
 * that is, a put(x,null) is the same as remove(x).
* At this point "" is also not allowed as a key, although that may change. * @author markdavis * * @internal CLDR */ public final class UnicodeMap implements Cloneable, Freezable>, StringTransform, Iterable { /** * For serialization */ //private static final long serialVersionUID = -6540936876295804105L; static final boolean ASSERTIONS = false; static final long GROWTH_PERCENT = 200; // 100 is no growth! static final long GROWTH_GAP = 10; // extra bump! private int length; // two parallel arrays to save memory. Wish Java had structs. private int[] transitions; /* package private */ T[] values; private LinkedHashSet availableValues = new LinkedHashSet(); private transient boolean staleAvailableValues; private transient boolean errorOnReset; private volatile transient boolean locked; private int lastIndex; private TreeMap stringMap; { clear(); } public UnicodeMap() { } public UnicodeMap(UnicodeMap other) { this.putAll(other); } public UnicodeMap clear() { if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } length = 2; transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0}; values = (T[]) new Object[10]; availableValues.clear(); staleAvailableValues = false; errorOnReset = false; lastIndex = 0; stringMap = null; return this; } /* Boilerplate */ public boolean equals(Object other) { if (other == null) return false; try { UnicodeMap that = (UnicodeMap) other; if (length != that.length) return false; for (int i = 0; i < length-1; ++i) { if (transitions[i] != that.transitions[i]) return false; if (!areEqual(values[i], that.values[i])) return false; } return true; } catch (ClassCastException e) { return false; } } public static boolean areEqual(Object a , Object b) { if (a == b) return true; if (a == null || b == null) return false; return a.equals(b); } public int hashCode() { int result = length; // TODO might want to abbreviate this for speed. for (int i = 0; i < length-1; ++i) { result = 37*result + transitions[i]; result = 37*result; if (values[i] != null) { result += values[i].hashCode(); } } if (stringMap != null) { result = 37*result + stringMap.hashCode(); } return result; } /** * Standard clone. Warning, as with Collections, does not do deep clone. */ public UnicodeMap cloneAsThawed() { UnicodeMap that = new UnicodeMap(); that.length = length; that.transitions = (int[]) transitions.clone(); that.values = (T[]) values.clone(); that.availableValues = new LinkedHashSet(availableValues); that.locked = false; that.stringMap = stringMap == null ? null : (TreeMap) stringMap.clone(); return that; } /* for internal consistency checking */ void _checkInvariants() { if (length < 2 || length > transitions.length || transitions.length != values.length) { throw new IllegalArgumentException("Invariant failed: Lengths bad"); } for (int i = 1; i < length-1; ++i) { if (areEqual(values[i-1], values[i])) { throw new IllegalArgumentException("Invariant failed: values shared at " + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">" + "\t" + Utility.hex(i) + ": <" + values[i] + ">" ); } } if (transitions[0] != 0 || transitions[length-1] != 0x110000) { throw new IllegalArgumentException("Invariant failed: bounds set wrong"); } for (int i = 1; i < length-1; ++i) { if (transitions[i-1] >= transitions[i]) { throw new IllegalArgumentException("Invariant failed: not monotonic" + "\t" + Utility.hex(i-1) + ": " + transitions[i-1] + "\t" + Utility.hex(i) + ": " + transitions[i] ); } } } /** * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1] * Assumes that 0 <= codepoint <= 0x10FFFF * @param codepoint * @return the index */ private int _findIndex(int c) { int lo = 0; int hi = length - 1; int i = (lo + hi) >>> 1; // invariant: c >= list[lo] // invariant: c < list[hi] while (i != lo) { if (c < transitions[i]) { hi = i; } else { lo = i; } i = (lo + hi) >>> 1; } if (ASSERTIONS) _checkFind(c, lo); return lo; } private void _checkFind(int codepoint, int value) { int other = __findIndex(codepoint); if (other != value) { throw new IllegalArgumentException("Invariant failed: binary search" + "\t" + Utility.hex(codepoint) + ": " + value + "\tshould be: " + other); } } private int __findIndex(int codepoint) { for (int i = length-1; i > 0; --i) { if (transitions[i] <= codepoint) return i; } return 0; } /* * Try indexed lookup static final int SHIFT = 8; int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found boolean startsValid = false; private int findIndex(int codepoint) { if (!startsValid) { int start = 0; for (int i = 1; i < length; ++i) { } } for (int i = length-1; i > 0; --i) { if (transitions[i] <= codepoint) return i; } return 0; } */ /** * Remove the items from index through index+count-1. * Logically reduces the size of the internal arrays. * @param index * @param count */ private void _removeAt(int index, int count) { for (int i = index + count; i < length; ++i) { transitions[i-count] = transitions[i]; values[i-count] = values[i]; } length -= count; } /** * Add a gap from index to index+count-1. * The values there are undefined, and must be set. * Logically grows arrays to accommodate. Actual growth is limited * @param index * @param count */ private void _insertGapAt(int index, int count) { int newLength = length + count; int[] oldtransitions = transitions; T[] oldvalues = values; if (newLength > transitions.length) { int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100); transitions = new int[allocation]; values = (T[]) new Object[allocation]; for (int i = 0; i < index; ++i) { transitions[i] = oldtransitions[i]; values[i] = oldvalues[i]; } } for (int i = length - 1; i >= index; --i) { transitions[i+count] = oldtransitions[i]; values[i+count] = oldvalues[i]; } length = newLength; } /** * Associates code point with value. Removes any previous association. * All code that calls this MUST check for frozen first! * @param codepoint * @param value * @return this, for chaining */ private UnicodeMap _put(int codepoint, T value) { // Warning: baseIndex is an invariant; must // be defined such that transitions[baseIndex] < codepoint // at end of this routine. int baseIndex; if (transitions[lastIndex] <= codepoint && codepoint < transitions[lastIndex+1]) { baseIndex = lastIndex; } else { baseIndex = _findIndex(codepoint); } int limitIndex = baseIndex + 1; // cases are (a) value is already set if (areEqual(values[baseIndex], value)) return this; if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } if (errorOnReset && values[baseIndex] != null) { throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint) + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value); } // adjust the available values staleAvailableValues = true; availableValues.add(value); // add if not there already int baseCP = transitions[baseIndex]; int limitCP = transitions[limitIndex]; // we now start walking through the difference case, // based on whether we are at the start or end of range // and whether the range is a single character or multiple if (baseCP == codepoint) { // CASE: At very start of range boolean connectsWithPrevious = baseIndex != 0 && areEqual(value, values[baseIndex-1]); if (limitCP == codepoint + 1) { // CASE: Single codepoint range boolean connectsWithFollowing = baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 if (connectsWithPrevious) { // A1a connects with previous & following, so remove index if (connectsWithFollowing) { _removeAt(baseIndex, 2); } else { _removeAt(baseIndex, 1); // extend previous } --baseIndex; // fix up } else if (connectsWithFollowing) { _removeAt(baseIndex, 1); // extend following backwards transitions[baseIndex] = codepoint; } else { // doesn't connect on either side, just reset values[baseIndex] = value; } } else if (connectsWithPrevious) { // A.1: start of multi codepoint range // if connects ++transitions[baseIndex]; // extend previous } else { // otherwise insert new transition transitions[baseIndex] = codepoint+1; // fix following range _insertGapAt(baseIndex, 1); values[baseIndex] = value; transitions[baseIndex] = codepoint; } } else if (limitCP == codepoint + 1) { // CASE: at end of range // if connects, just back up range boolean connectsWithFollowing = baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 if (connectsWithFollowing) { --transitions[limitIndex]; return this; } else { _insertGapAt(limitIndex, 1); transitions[limitIndex] = codepoint; values[limitIndex] = value; } } else { // CASE: in middle of range // insert gap, then set the new range _insertGapAt(++baseIndex,2); transitions[baseIndex] = codepoint; values[baseIndex] = value; transitions[baseIndex+1] = codepoint + 1; values[baseIndex+1] = values[baseIndex-1]; // copy lower range values } lastIndex = baseIndex; // store for next time return this; } private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) { // TODO optimize for (int i = startCodePoint; i <= endCodePoint; ++i) { _put(i, value); if (ASSERTIONS) _checkInvariants(); } return this; } /** * Sets the codepoint value. * @param codepoint * @param value * @return this (for chaining) */ public UnicodeMap put(int codepoint, T value) { if (codepoint < 0 || codepoint > 0x10FFFF) { throw new IllegalArgumentException("Codepoint out of range: " + codepoint); } _put(codepoint, value); if (ASSERTIONS) _checkInvariants(); return this; } /** * Sets the codepoint value. * @param codepoint * @param value * @return this (for chaining) */ public UnicodeMap put(String string, T value) { int v = UnicodeSet.getSingleCodePoint(string); if (v == Integer.MAX_VALUE) { if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } if (value != null) { if (stringMap == null) { stringMap = new TreeMap(); } stringMap.put(string, value); staleAvailableValues = true; } else if (stringMap != null) { if (stringMap.remove(string) != null) { staleAvailableValues = true; } } return this; } return put(v, value); } /** * Adds bunch o' codepoints; otherwise like put. * @param codepoints * @param value * @return this (for chaining) */ public UnicodeMap putAll(UnicodeSet codepoints, T value) { UnicodeSetIterator it = new UnicodeSetIterator(codepoints); while (it.nextRange()) { if (it.string == null) { _putAll(it.codepoint, it.codepointEnd, value); } else { put(it.string, value); } } return this; } /** * Adds bunch o' codepoints; otherwise like add. * @param startCodePoint * @param endCodePoint * @param value * @return this (for chaining) */ public UnicodeMap putAll(int startCodePoint, int endCodePoint, T value) { if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } if (startCodePoint < 0 || endCodePoint > 0x10FFFF) { throw new IllegalArgumentException("Codepoint out of range: " + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint)); } return _putAll(startCodePoint, endCodePoint, value); } /** * Add all the (main) values from a UnicodeMap * @param unicodeMap the property to add to the map * @return this (for chaining) */ public UnicodeMap putAll(UnicodeMap unicodeMap) { for (int i = 0; i < unicodeMap.length; ++i) { T value = unicodeMap.values[i]; if (value != null) { _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value); } if (ASSERTIONS) _checkInvariants(); } if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) { if (stringMap == null) { stringMap = new TreeMap(); } stringMap.putAll(unicodeMap.stringMap); } return this; } /** * Add all the (main) values from a Unicode property * @param prop the property to add to the map * @return this (for chaining) */ public UnicodeMap putAllFiltered(UnicodeMap prop, UnicodeSet filter) { // TODO optimize for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) { if (it.codepoint != UnicodeSetIterator.IS_STRING) { T value = prop.getValue(it.codepoint); if (value != null) { _put(it.codepoint, value); } } } // now do the strings for (String key : filter.strings()) { T value = prop.get(key); if (value != null) { put(key, value); } } return this; } /** * Set the currently unmapped Unicode code points to the given value. * @param value the value to set * @return this (for chaining) */ public UnicodeMap setMissing(T value) { // fast path, if value not yet present if (!getAvailableValues().contains(value)) { staleAvailableValues = true; availableValues.add(value); for (int i = 0; i < length; ++i) { if (values[i] == null) values[i] = value; } return this; } else { return putAll(keySet(null), value); } } /** * Returns the keyset consisting of all the keys that would produce the given value. Deposits into * result if it is not null. Remember to clear if you just want * the new values. */ public UnicodeSet keySet(T value, UnicodeSet result) { if (result == null) result = new UnicodeSet(); for (int i = 0; i < length - 1; ++i) { if (areEqual(value, values[i])) { result.add(transitions[i], transitions[i+1]-1); } } if (value != null && stringMap != null) { for (String key : stringMap.keySet()) { T newValue = stringMap.get(key); if (value.equals(newValue)) { result.add((String)key); } } } return result; } /** * Returns the keyset consisting of all the keys that would produce the given value. * the new values. */ public UnicodeSet keySet(T value) { return keySet(value,null); } /** * Returns the keyset consisting of all the keys that would produce (non-null) values. */ public UnicodeSet keySet() { UnicodeSet result = new UnicodeSet(); for (int i = 0; i < length - 1; ++i) { if (values[i] != null) { result.add(transitions[i], transitions[i+1]-1); } } if (stringMap != null) { result.addAll(stringMap.keySet()); } return result; } /** * Returns the list of possible values. Deposits each non-null value into * result. Creates result if it is null. Remember to clear result if * you are not appending to existing collection. * @param result * @return result */ public > U values(U result) { if (staleAvailableValues) { // collect all the current values // retain them in the availableValues Set temp = new HashSet(); for (int i = 0; i < length - 1; ++i) { if (values[i] != null) temp.add(values[i]); } availableValues.retainAll(temp); if (stringMap != null) { availableValues.addAll(stringMap.values()); } staleAvailableValues = false; } if (result == null) { result = (U) new LinkedHashSet(availableValues.size()); } result.addAll(availableValues); return result; } /** * Convenience method */ public Set values() { return getAvailableValues(null); } /** * Gets the value associated with a given code point. * Returns null, if there is no such value. * @param codepoint * @return the value */ public T get(int codepoint) { if (codepoint < 0 || codepoint > 0x10FFFF) { throw new IllegalArgumentException("Codepoint out of range: " + codepoint); } return values[_findIndex(codepoint)]; } /** * Gets the value associated with a given code point. * Returns null, if there is no such value. * @param codepoint * @return the value */ public T get(String value) { if (UTF16.hasMoreCodePointsThan(value, 1)) { if (stringMap == null) { return null; } return stringMap.get(value); } return getValue(UTF16.charAt(value, 0)); } /** * Change a new string from the source string according to the mappings. * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString() * TODO: extend to strings * @param source * @return */ public String transform(String source) { StringBuffer result = new StringBuffer(); int cp; for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(source, i); T mResult = getValue(cp); if (mResult != null) { result.append(mResult); } else { UTF16.append(result, cp); } } return result.toString(); } /** * Used to add complex values, where the value isn't replaced but in some sense composed * @author markdavis */ public abstract static class Composer { /** * This will be called with either a string or a code point. The result is the new value for that item. * If the codepoint is used, the string is null; if the string is used, the codepoint is -1. * @param a * @param b */ public abstract T compose(int codePoint, String string, T a, T b); } public UnicodeMap composeWith(UnicodeMap other, Composer composer) { for (T value : other.getAvailableValues()) { UnicodeSet set = other.keySet(value); composeWith(set, value, composer); } return this; } public UnicodeMap composeWith(UnicodeSet set, T value, Composer composer) { for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { int i = it.codepoint; if (i == UnicodeSetIterator.IS_STRING) { String s = it.string; T v1 = getValue(s); T v3 = composer.compose(-1, s, v1, value); if (v1 != v3 && (v1 == null || !v1.equals(v3))) { put(s, v3); } } else { T v1 = getValue(i); T v3 = composer.compose(i, null, v1, value); if (v1 != v3 && (v1 == null || !v1.equals(v3))) { put(i, v3); } } } return this; } public String toString() { return toString(null); } public String toString(Comparator collected) { StringBuffer result = new StringBuffer(); if (collected == null) { for (int i = 0; i < length-1; ++i) { T value = values[i]; if (value == null) continue; int start = transitions[i]; int end = transitions[i+1]-1; result.append(Utility.hex(start)); if (start != end) result.append("-").append(Utility.hex(end)); result.append("=").append(value.toString()).append("\n"); } if (stringMap != null) { for (String s : stringMap.keySet()) { result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n"); } } } else { Set set = values(new TreeSet(collected)); for (Iterator it = set.iterator(); it.hasNext();) { T value = it.next(); UnicodeSet s = keySet(value); result.append(value).append("=").append(s.toString()).append("\n"); } } return result.toString(); } /** * @return Returns the errorOnReset value. */ public boolean getErrorOnReset() { return errorOnReset; } /** * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception. * @param errorOnReset The errorOnReset to set. */ public UnicodeMap setErrorOnReset(boolean errorOnReset) { this.errorOnReset = errorOnReset; return this; } /* (non-Javadoc) * @see com.ibm.icu.dev.test.util.Freezable#isFrozen() */ public boolean isFrozen() { // TODO Auto-generated method stub return locked; } /* (non-Javadoc) * @see com.ibm.icu.dev.test.util.Freezable#lock() */ public UnicodeMap freeze() { locked = true; return this; } /** * Utility to find the maximal common prefix of two strings. * TODO: fix supplemental support */ static public int findCommonPrefix(String last, String s) { int minLen = Math.min(last.length(), s.length()); for (int i = 0; i < minLen; ++i) { if (last.charAt(i) != s.charAt(i)) return i; } return minLen; } /** * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings(). */ public int getRangeCount() { return length-1; } /** * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. */ public int getRangeStart(int range) { return transitions[range]; } /** * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. */ public int getRangeEnd(int range) { return transitions[range+1] - 1; } /** * Get the value for the range. */ public T getRangeValue(int range) { return values[range]; } /** * Get the strings that are not in the ranges. Returns null if there are none. * @return */ public Set getNonRangeStrings() { if (stringMap == null || stringMap.isEmpty()) { return null; } return Collections.unmodifiableSet(stringMap.keySet()); } static final boolean DEBUG_WRITE = false; /* (non-Javadoc) * @see java.util.Map#containsKey(java.lang.Object) */ public boolean containsKey(String key) { return getValue(key) != null; } /* (non-Javadoc) * @see java.util.Map#containsKey(java.lang.Object) */ public boolean containsKey(int key) { return getValue(key) != null; } /* (non-Javadoc) * @see java.util.Map#containsValue(java.lang.Object) */ public boolean containsValue(T value) { // TODO Optimize return getAvailableValues().contains(value); } /* (non-Javadoc) * @see java.util.Map#isEmpty() */ public boolean isEmpty() { return size() == 0; } /* (non-Javadoc) * @see java.util.Map#putAll(java.util.Map) */ public UnicodeMap putAll(Map map) { for (String key : map.keySet()) { put(key,map.get(key)); } return this; } /** * Utility for extracting map * @deprecated */ public UnicodeMap putAllIn(Map map) { for (String key : keySet()) { map.put(key, get(key)); } return this; } /** * Utility for extracting map */ public > U putAllInto(U map) { for (EntryRange entry : entryRanges()) { if (entry.string != null) { break; } for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) { map.put(UTF16.valueOf(cp), entry.value); } } map.putAll(stringMap); return map; } /** * Utility for extracting map */ public > U putAllCodepointsInto(U map) { for (EntryRange entry : entryRanges()) { if (entry.string != null) { break; } for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) { map.put(cp, entry.value); } } return map; } /* (non-Javadoc) * @see java.util.Map#remove(java.lang.Object) */ public UnicodeMap remove(String key) { return put(key, null); } /* (non-Javadoc) * @see java.util.Map#remove(java.lang.Object) */ public UnicodeMap remove(int key) { return put(key, null); } /* (non-Javadoc) * @see java.util.Map#size() */ public int size() { int result = stringMap == null ? 0 : stringMap.size(); for (int i = 0; i < length-1; ++i) { T value = values[i]; if (value == null) continue; result += transitions[i+1] - transitions[i]; } return result; } /* (non-Javadoc) * @see java.util.Map#entrySet() */ public Iterable> entrySet() { return new EntrySetX(); } private class EntrySetX implements Iterable> { public Iterator> iterator() { return new IteratorX(); } public String toString() { StringBuffer b = new StringBuffer(); for (Iterator it = iterator(); it.hasNext();) { Object item = it.next(); b.append(item.toString()).append(' '); } return b.toString(); } } private class IteratorX implements Iterator> { Iterator iterator = keySet().iterator(); /* (non-Javadoc) * @see java.util.Iterator#hasNext() */ public boolean hasNext() { return iterator.hasNext(); } /* (non-Javadoc) * @see java.util.Iterator#next() */ public Entry next() { String key = iterator.next(); return new ImmutableEntry(key, get(key)); } /* (non-Javadoc) * @see java.util.Iterator#remove() */ public void remove() { throw new UnsupportedOperationException(); } } /** * Struct-like class used to iterate over a UnicodeMap in a for loop. * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null; * Caution: The contents may change during the iteration! */ public static class EntryRange { public int codepoint; public int codepointEnd; public String string; public T value; @Override public String toString() { return (string != null ? Utility.hex(string) : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd))) + "=" + value; } } /** * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps. * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration! * The value is guaranteed never to be null. The entryRange.string values (non-null) are after all the ranges. * @return entry range, for for loops */ public Iterable> entryRanges() { return new EntryRanges(); } private class EntryRanges implements Iterable>, Iterator> { private int pos; private EntryRange result = new EntryRange(); private int lastRealRange = values[length-2] == null ? length - 2 : length - 1; private Iterator> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator(); public Iterator> iterator() { return this; } public boolean hasNext() { return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext()); } public EntryRange next() { // a range may be null, but then the next one must not be (except the final range) if (pos < lastRealRange) { T temp = values[pos]; if (temp == null) { temp = values[++pos]; } result.codepoint = transitions[pos]; result.codepointEnd = transitions[pos+1]-1; result.string = null; result.value = temp; ++pos; } else { Entry entry = stringIterator.next(); result.codepoint = result.codepointEnd = -1; result.string = entry.getKey(); result.value = entry.getValue(); } return result; } public void remove() { throw new UnsupportedOperationException(); } } /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ public Iterator iterator() { return keySet().iterator(); } /** * Old form for compatibility */ public T getValue(String key) { return get(key); } /** * Old form for compatibility */ public T getValue(int key) { // TODO Auto-generated method stub return get(key); } /** * Old form for compatibility */ public Collection getAvailableValues() { return values(); } /** * Old form for compatibility */ public > U getAvailableValues(U result) { return values(result); } /** * Old form for compatibility */ public UnicodeSet getSet(T value) { return keySet(value); } /** * Old form for compatibility */ public UnicodeSet getSet(T value, UnicodeSet result) { return keySet(value, result); } // This is to support compressed serialization. It works; just commented out for now as we shift to Generics // TODO Fix once generics are cleaned up. // // TODO Fix to serialize more than just strings. // // Only if all the items are strings will we do the following compression // // Otherwise we'll just use Java Serialization, bulky as it is // public void writeExternal(ObjectOutput out1) throws IOException { // DataOutputCompressor sc = new DataOutputCompressor(out1); // // if all objects are strings // Collection availableVals = getAvailableValues(); // boolean allStrings = allAreString(availableVals); // sc.writeBoolean(allStrings); // Map object_index = new LinkedHashMap(); // if (allAreString(availableVals)) { // sc.writeStringSet(new TreeSet(availableVals), object_index); // } else { // sc.writeCollection(availableVals, object_index); // } // sc.writeUInt(length); // int lastTransition = -1; // int lastValueNumber = 0; // if (DEBUG_WRITE) System.out.println("Trans count: " + length); // for (int i = 0; i < length; ++i) { // int valueNumber = ((Integer)object_index.get(values[i])).intValue(); // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber); // // int deltaTransition = transitions[i] - lastTransition; // lastTransition = transitions[i]; // int deltaValueNumber = valueNumber - lastValueNumber; // lastValueNumber = valueNumber; // // deltaValueNumber <<= 1; // make room for one bit // boolean canCombine = deltaTransition == 1; // if (canCombine) deltaValueNumber |= 1; // sc.writeInt(deltaValueNumber); // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber); // if (!canCombine) { // sc.writeUInt(deltaTransition); // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); // } // } // sc.flush(); // } // // /** // * // */ // private boolean allAreString(Collection availableValues2) { // //if (true) return false; // for (Iterator it = availableValues2.iterator(); it.hasNext();) { // if (!(it.next() instanceof String)) return false; // } // return true; // } // // public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException { // DataInputCompressor sc = new DataInputCompressor(in1); // boolean allStrings = sc.readBoolean(); // T[] valuesList; // availableValues = new LinkedHashSet(); // if (allStrings) { // valuesList = sc.readStringSet(availableValues); // } else { // valuesList = sc.readCollection(availableValues); // } // length = sc.readUInt(); // transitions = new int[length]; // if (DEBUG_WRITE) System.out.println("Trans count: " + length); // values = (T[]) new Object[length]; // int currentTransition = -1; // int currentValue = 0; // int deltaTransition; // for (int i = 0; i < length; ++i) { // int temp = sc.readInt(); // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp); // boolean combined = (temp & 1) != 0; // temp >>= 1; // values[i] = valuesList[currentValue += temp]; // if (!combined) { // deltaTransition = sc.readUInt(); // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); // } else { // deltaTransition = 1; // } // transitions[i] = currentTransition += deltaTransition; // delta value // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue); // } // } public final UnicodeMap removeAll(UnicodeSet set) { return putAll(set, null); } public final UnicodeMap removeAll(UnicodeMap reference) { return removeRetainAll(reference, true); } public final UnicodeMap retainAll(UnicodeSet set) { UnicodeSet toNuke = new UnicodeSet(); // TODO Optimize for (EntryRange ae : entryRanges()) { if (ae.string != null) { if (!set.contains(ae.string)) { toNuke.add(ae.string); } } else { for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { if (!set.contains(i)) { toNuke.add(i); } } } } return putAll(toNuke, null); } public final UnicodeMap retainAll(UnicodeMap reference) { return removeRetainAll(reference, false); } private final UnicodeMap removeRetainAll(UnicodeMap reference, boolean remove) { UnicodeSet toNuke = new UnicodeSet(); // TODO Optimize for (EntryRange ae : entryRanges()) { if (ae.string != null) { if (ae.value.equals(reference.get(ae.string)) == remove) { toNuke.add(ae.string); } } else { for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { if (ae.value.equals(reference.get(i)) == remove) { toNuke.add(i); } } } } return putAll(toNuke, null); } /** * Returns the keys that consist of multiple code points. * @return */ public final Set stringKeys() { return getNonRangeStrings(); } /** * Gets the inverse of this map, adding to the target. Like putAllIn * @return */ public > U addInverseTo(U target) { for (T value : values()) { UnicodeSet uset = getSet(value); target.put(value, uset); } return target; } /** * Freeze an inverse map. * @param target * @return */ public static Map freeze(Map target) { for (UnicodeSet entry : target.values()) { entry.freeze(); } return Collections.unmodifiableMap(target); } /** * @param target * @return */ public UnicodeMap putAllInverse(Map source) { for (Entry entry : source.entrySet()) { putAll(entry.getValue(), entry.getKey()); } return this; } }