All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.penguin.korean.util.CharArrayMap Maven / Gradle / Ivy

There is a newer version: 4.4.4
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ===========================
 *
 * This is an adapted copy of org.apache.lucene.analysis.util.CharArrayMap
 * to avoid unnecessary conflicts.
 */

package com.twitter.penguin.korean.util;

import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
 * A simple class that stores key Strings as char[]'s in a
 * hash table. Note that this is not a general purpose
 * class.  For example, it cannot remove items from the
 * map, nor does it resize its hash table to be smaller,
 * etc.  It is designed to be quick to retrieve items
 * by char[] keys without the necessity of converting
 * to a String first.
 */
public class CharArrayMap extends AbstractMap {
  // private only because missing generics
  private static final CharArrayMap EMPTY_MAP = new EmptyCharArrayMap<>();

  private final static int INIT_SIZE = 8;
  private final CharacterUtils charUtils;
  private boolean ignoreCase;
  private int count;
  char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
  V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator

  /**
   * Create map with enough capacity to hold startSize terms
   *
   * @param startSize  the initial capacity
   * @param ignoreCase false if and only if the set should be case sensitive
   *                   otherwise true.
   */
  public CharArrayMap(int startSize, boolean ignoreCase) {
    this.ignoreCase = ignoreCase;
    int size = INIT_SIZE;
    while (startSize + (startSize >> 2) > size)
      size <<= 1;
    keys = new char[size][];
    values = (V[]) new Object[size];
    this.charUtils = CharacterUtils.getInstance();
  }

  /**
   * Creates a map from the mappings in another map.
   *
   * @param c          a map whose mappings to be copied
   * @param ignoreCase false if and only if the set should be case sensitive
   *                   otherwise true.
   */
  public CharArrayMap(Map c, boolean ignoreCase) {
    this(c.size(), ignoreCase);
    putAll(c);
  }

  /**
   * Create set from the supplied map (used internally for readonly maps...)
   */
  private CharArrayMap(CharArrayMap toCopy) {
    this.keys = toCopy.keys;
    this.values = toCopy.values;
    this.ignoreCase = toCopy.ignoreCase;
    this.count = toCopy.count;
    this.charUtils = toCopy.charUtils;
  }

  /**
   * Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}.
   */
  @Override
  public void clear() {
    count = 0;
    Arrays.fill(keys, null);
    Arrays.fill(values, null);
  }

  /**
   * true if the len chars of text starting at off
   * are in the {@link #keySet()}
   */
  public boolean containsKey(char[] text, int off, int len) {
    return keys[getSlot(text, off, len)] != null;
  }

  /**
   * true if the CharSequence is in the {@link #keySet()}
   */
  public boolean containsKey(CharSequence cs) {
    return keys[getSlot(cs)] != null;
  }

  @Override
  public boolean containsKey(Object o) {
    if (o instanceof char[]) {
      final char[] text = (char[]) o;
      return containsKey(text, 0, text.length);
    }
    return containsKey(o.toString());
  }

  /**
   * returns the value of the mapping of len chars of text
   * starting at off
   */
  public V get(char[] text, int off, int len) {
    return values[getSlot(text, off, len)];
  }

  /**
   * returns the value of the mapping of the chars inside this {@code CharSequence}
   */
  public V get(CharSequence cs) {
    return values[getSlot(cs)];
  }

  @Override
  public V get(Object o) {
    if (o instanceof char[]) {
      final char[] text = (char[]) o;
      return get(text, 0, text.length);
    }
    return get(o.toString());
  }

  private int getSlot(char[] text, int off, int len) {
    int code = getHashCode(text, off, len);
    int pos = code & (keys.length - 1);
    char[] text2 = keys[pos];
    if (text2 != null && !equals(text, off, len, text2)) {
      final int inc = ((code >> 8) + code) | 1;
      do {
        code += inc;
        pos = code & (keys.length - 1);
        text2 = keys[pos];
      } while (text2 != null && !equals(text, off, len, text2));
    }
    return pos;
  }

  /**
   * Returns true if the String is in the set
   */
  private int getSlot(CharSequence text) {
    int code = getHashCode(text);
    int pos = code & (keys.length - 1);
    char[] text2 = keys[pos];
    if (text2 != null && !equals(text, text2)) {
      final int inc = ((code >> 8) + code) | 1;
      do {
        code += inc;
        pos = code & (keys.length - 1);
        text2 = keys[pos];
      } while (text2 != null && !equals(text, text2));
    }
    return pos;
  }

  /**
   * Add the given mapping.
   */
  public V put(CharSequence text, V value) {
    return put(text.toString(), value); // could be more efficient
  }

  @Override
  public V put(Object o, V value) {
    if (o instanceof char[]) {
      return put((char[]) o, value);
    }
    return put(o.toString(), value);
  }

  /**
   * Add the given mapping.
   */
  public V put(String text, V value) {
    return put(text.toCharArray(), value);
  }

  /**
   * Add the given mapping.
   * If ignoreCase is true for this Set, the text array will be directly modified.
   * The user should never modify this text array after calling this method.
   */
  public V put(char[] text, V value) {
    if (ignoreCase) {
      charUtils.toLowerCase(text, 0, text.length);
    }
    int slot = getSlot(text, 0, text.length);
    if (keys[slot] != null) {
      final V oldValue = values[slot];
      values[slot] = value;
      return oldValue;
    }
    keys[slot] = text;
    values[slot] = value;
    count++;

    if (count + (count >> 2) > keys.length) {
      rehash();
    }

    return null;
  }

  @SuppressWarnings("unchecked")
  private void rehash() {
    assert keys.length == values.length;
    final int newSize = 2 * keys.length;
    final char[][] oldkeys = keys;
    final V[] oldvalues = values;
    keys = new char[newSize][];
    values = (V[]) new Object[newSize];

    for (int i = 0; i < oldkeys.length; i++) {
      char[] text = oldkeys[i];
      if (text != null) {
        // todo: could be faster... no need to compare strings on collision
        final int slot = getSlot(text, 0, text.length);
        keys[slot] = text;
        values[slot] = oldvalues[i];
      }
    }
  }

  private boolean equals(char[] text1, int off, int len, char[] text2) {
    if (len != text2.length)
      return false;
    final int limit = off + len;
    if (ignoreCase) {
      for (int i = 0; i < len; ) {
        final int codePointAt = charUtils.codePointAt(text1, off + i, limit);
        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i, text2.length))
          return false;
        i += Character.charCount(codePointAt);
      }
    } else {
      for (int i = 0; i < len; i++) {
        if (text1[off + i] != text2[i])
          return false;
      }
    }
    return true;
  }

  private boolean equals(CharSequence text1, char[] text2) {
    int len = text1.length();
    if (len != text2.length)
      return false;
    if (ignoreCase) {
      for (int i = 0; i < len; ) {
        final int codePointAt = charUtils.codePointAt(text1, i);
        if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i, text2.length))
          return false;
        i += Character.charCount(codePointAt);
      }
    } else {
      for (int i = 0; i < len; i++) {
        if (text1.charAt(i) != text2[i])
          return false;
      }
    }
    return true;
  }

  private int getHashCode(char[] text, int offset, int len) {
    if (text == null)
      throw new NullPointerException();
    int code = 0;
    final int stop = offset + len;
    if (ignoreCase) {
      for (int i = offset; i < stop; ) {
        final int codePointAt = charUtils.codePointAt(text, i, stop);
        code = code * 31 + Character.toLowerCase(codePointAt);
        i += Character.charCount(codePointAt);
      }
    } else {
      for (int i = offset; i < stop; i++) {
        code = code * 31 + text[i];
      }
    }
    return code;
  }

  private int getHashCode(CharSequence text) {
    if (text == null)
      throw new NullPointerException();
    int code = 0;
    int len = text.length();
    if (ignoreCase) {
      for (int i = 0; i < len; ) {
        int codePointAt = charUtils.codePointAt(text, i);
        code = code * 31 + Character.toLowerCase(codePointAt);
        i += Character.charCount(codePointAt);
      }
    } else {
      for (int i = 0; i < len; i++) {
        code = code * 31 + text.charAt(i);
      }
    }
    return code;
  }

  @Override
  public V remove(Object key) {
    throw new UnsupportedOperationException();
  }

  @Override
  public int size() {
    return count;
  }

  @Override
  public String toString() {
    final StringBuilder sb = new StringBuilder("{");
    for (Map.Entry entry : entrySet()) {
      if (sb.length() > 1) sb.append(", ");
      sb.append(entry);
    }
    return sb.append('}').toString();
  }

  private EntrySet entrySet = null;
  private CharArraySet keySet = null;

  EntrySet createEntrySet() {
    return new EntrySet(true);
  }

  @Override
  public final EntrySet entrySet() {
    if (entrySet == null) {
      entrySet = createEntrySet();
    }
    return entrySet;
  }

  // helper for CharArraySet to not produce endless recursion
  final Set originalKeySet() {
    return super.keySet();
  }

  /**
   * Returns an {@link CharArraySet} view on the map's keys.
   * The set will use the same {@code matchVersion} as this map.
   */
  @Override
  @SuppressWarnings({"unchecked", "rawtypes"})
  public final CharArraySet keySet() {
    if (keySet == null) {
      // prevent adding of entries
      keySet = new CharArraySet((CharArrayMap) this) {
        @Override
        public boolean add(Object o) {
          throw new UnsupportedOperationException();
        }

        @Override
        public boolean add(CharSequence text) {
          throw new UnsupportedOperationException();
        }

        @Override
        public boolean add(String text) {
          throw new UnsupportedOperationException();
        }

        @Override
        public boolean add(char[] text) {
          throw new UnsupportedOperationException();
        }
      };
    }
    return keySet;
  }

  /**
   * public iterator class so efficient methods are exposed to users
   */
  public class EntryIterator implements Iterator> {
    private int pos = -1;
    private int lastPos;
    private final boolean allowModify;

    private EntryIterator(boolean allowModify) {
      this.allowModify = allowModify;
      goNext();
    }

    private void goNext() {
      lastPos = pos;
      pos++;
      while (pos < keys.length && keys[pos] == null) pos++;
    }

    @Override
    public boolean hasNext() {
      return pos < keys.length;
    }

    /**
     * gets the next key... do not modify the returned char[]
     */
    public char[] nextKey() {
      goNext();
      return keys[lastPos];
    }

    /**
     * gets the next key as a newly created String object
     */
    public String nextKeyString() {
      return new String(nextKey());
    }

    /**
     * returns the value associated with the last key returned
     */
    public V currentValue() {
      return values[lastPos];
    }

    /**
     * sets the value associated with the last key returned
     */
    public V setValue(V value) {
      if (!allowModify)
        throw new UnsupportedOperationException();
      V old = values[lastPos];
      values[lastPos] = value;
      return old;
    }

    /**
     * use nextCharArray() + currentValue() for better efficiency.
     */
    @Override
    public Map.Entry next() {
      goNext();
      return new MapEntry(lastPos, allowModify);
    }

    @Override
    public void remove() {
      throw new UnsupportedOperationException();
    }
  }

  private final class MapEntry implements Map.Entry {
    private final int pos;
    private final boolean allowModify;

    private MapEntry(int pos, boolean allowModify) {
      this.pos = pos;
      this.allowModify = allowModify;
    }

    @Override
    public Object getKey() {
      // we must clone here, as putAll to another CharArrayMap
      // with other case sensitivity flag would corrupt the keys
      return keys[pos].clone();
    }

    @Override
    public V getValue() {
      return values[pos];
    }

    @Override
    public V setValue(V value) {
      if (!allowModify)
        throw new UnsupportedOperationException();
      final V old = values[pos];
      values[pos] = value;
      return old;
    }

    @Override
    public String toString() {
      return new StringBuilder().append(keys[pos]).append('=')
          .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
          .toString();
    }
  }

  /**
   * public EntrySet class so efficient methods are exposed to users
   */
  public final class EntrySet extends AbstractSet> {
    private final boolean allowModify;

    private EntrySet(boolean allowModify) {
      this.allowModify = allowModify;
    }

    @Override
    public EntryIterator iterator() {
      return new EntryIterator(allowModify);
    }

    @Override
    @SuppressWarnings("unchecked")
    public boolean contains(Object o) {
      if (!(o instanceof Map.Entry))
        return false;
      final Map.Entry e = (Map.Entry) o;
      final Object key = e.getKey();
      final Object val = e.getValue();
      final Object v = get(key);
      return v == null ? val == null : v.equals(val);
    }

    @Override
    public boolean remove(Object o) {
      throw new UnsupportedOperationException();
    }

    @Override
    public int size() {
      return count;
    }

    @Override
    public void clear() {
      if (!allowModify)
        throw new UnsupportedOperationException();
      CharArrayMap.this.clear();
    }
  }

  /**
   * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
   * unmodifiable views of internal map for "read-only" use.
   *
   * @param map a map for which the unmodifiable map is returned.
   * @return an new unmodifiable {@link CharArrayMap}.
   * @throws NullPointerException if the given map is null.
   */
  public static  CharArrayMap unmodifiableMap(CharArrayMap map) {
    if (map == null)
      throw new NullPointerException("Given map is null");
    if (map == emptyMap() || map.isEmpty())
      return emptyMap();
    if (map instanceof UnmodifiableCharArrayMap)
      return map;
    return new UnmodifiableCharArrayMap<>(map);
  }

  /**
   * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
   * is a {@link CharArrayMap} the ignoreCase property will be preserved.
   *
   * @param map a map to copy
   * @return a copy of the given map as a {@link CharArrayMap}. If the given map
   * is a {@link CharArrayMap} the ignoreCase property as well as the
   * matchVersion will be of the given map will be preserved.
   */
  public static  CharArrayMap copy(final Map map) {
    return copy(map);
  }

  /**
   * Returns an empty, unmodifiable map.
   */
  @SuppressWarnings("unchecked")
  public static  CharArrayMap emptyMap() {
    return (CharArrayMap) EMPTY_MAP;
  }

  // package private CharArraySet instanceof check in CharArraySet
  static class UnmodifiableCharArrayMap extends CharArrayMap {

    UnmodifiableCharArrayMap(CharArrayMap map) {
      super(map);
    }

    @Override
    public void clear() {
      throw new UnsupportedOperationException();
    }

    @Override
    public V put(Object o, V val) {
      throw new UnsupportedOperationException();
    }

    @Override
    public V put(char[] text, V val) {
      throw new UnsupportedOperationException();
    }

    @Override
    public V put(CharSequence text, V val) {
      throw new UnsupportedOperationException();
    }

    @Override
    public V put(String text, V val) {
      throw new UnsupportedOperationException();
    }

    @Override
    public V remove(Object key) {
      throw new UnsupportedOperationException();
    }

    @Override
    EntrySet createEntrySet() {
      return new EntrySet(false);
    }
  }

  /**
   * Empty {@link org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap} optimized for speed.
   * Contains checks will always return false or throw
   * NPE if necessary.
   */
  private static final class EmptyCharArrayMap extends UnmodifiableCharArrayMap {
    EmptyCharArrayMap() {
      super(new CharArrayMap(0, false));
    }

    @Override
    public boolean containsKey(char[] text, int off, int len) {
      if (text == null)
        throw new NullPointerException();
      return false;
    }

    @Override
    public boolean containsKey(CharSequence cs) {
      if (cs == null)
        throw new NullPointerException();
      return false;
    }

    @Override
    public boolean containsKey(Object o) {
      if (o == null)
        throw new NullPointerException();
      return false;
    }

    @Override
    public V get(char[] text, int off, int len) {
      if (text == null)
        throw new NullPointerException();
      return null;
    }

    @Override
    public V get(CharSequence cs) {
      if (cs == null)
        throw new NullPointerException();
      return null;
    }

    @Override
    public V get(Object o) {
      if (o == null)
        throw new NullPointerException();
      return null;
    }
  }
}