All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.util.TFloatMap Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.util;

import static edu.berkeley.nlp.util.LogInfo.errors;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.AbstractCollection;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;


/**
 * Provides a map from objects to doubles.
 * Motivation: provides a specialized data structure for
 * mapping objects to doubles which is both fast and space efficient.
 * Feature 1:
 * You can switch between two representations of the map:
 *   - Sorted list (lookups involve binary search)
 *   - Hash table with linear probing (lookups involve hashing)
 * Feature 2:
 * Sometimes, we want several maps with the same set of keys.
 * If we lock the map, we can share the same keys between several
 * maps, which saves space.
 *
 * Note: in the sorted list, we first sort the keys by
 * hash code, and then for equal hash code, we sort by the objects
 * values.  We hope that hash code collisions will be rare enough
 * that we won't have to resort to comparing objects.
 *
 * Typical usage:
 *   - Construct a map using a hash table.
 *   - To save space, switch to a sorted list representation.
 *
 * Will get runtime exception if try to used sorted list and keys are not
 * comparable.
 *
 * TODO: support remove operation.
 */
public class TFloatMap extends AbstractTMap
    implements Iterable.Entry>, Serializable
{
  protected static final long serialVersionUID = 42;

  public TFloatMap() {
    this(AbstractTMap.defaultFunctionality, defaultExpectedSize);
  }

  public TFloatMap(Functionality keyFunc) {
    this(keyFunc, defaultExpectedSize);
  }

  public TFloatMap(int expectedSize) {
	  this(AbstractTMap.defaultFunctionality, expectedSize);
  }

  // If keys are locked, we can share the same keys.
  public TFloatMap(AbstractTMap map) {
    this(map.keyFunc);
    this.mapType = map.mapType;
    this.locked = map.locked;
    this.num = map.num;
    this.keys = map.locked ? map.keys : (T[])map.keys.clone(); // Share keys! CHECKED
    if(map instanceof TFloatMap)
      this.values = ((TFloatMap)map).values.clone();
    else
      this.values = new float[keys.length];
  }

  /**
   * expectedSize: expected number of entries we're going to have in the map.
   */
  public TFloatMap(Functionality keyFunc, int expectedSize) {
    this.keyFunc = keyFunc;
    this.mapType = MapType.HASH_TABLE;
    this.locked = false;
    this.num = 0;
    allocate(getCapacity(num, false));
    this.numCollisions = 0;
  }

  // Main operations
  public boolean containsKey(T key) {
    return find(key, false) != -1;
  }
  public float get(T key, float defaultValue)
	{
    int i = find(key, false);
    return i == -1 ? defaultValue : values[i];
  }
  public float getWithErrorMsg(T key, float defaultValue)
	{
    int i = find(key, false);
    if(i == -1) errors("%s not in map, using %f", key, defaultValue);
    return i == -1 ? defaultValue : values[i];
  }
  public float getSure(T key)
	{
    // Throw exception if key doesn't exist.
    int i = find(key, false);
    if(i == -1) throw new RuntimeException("Missing key: " + key);
    return values[i];
  }
  public void put(T key, float value)
	{
    assert !Double.isNaN(value);
    int i = find(key, true);
    keys[i] = key;
    values[i] = value;
  }
  public void put(T key, float value, boolean keepHigher)
	{
    assert !Double.isNaN(value);
    int i = find(key, true);
    keys[i] = key;
    if (keepHigher && values[i] > value) return; 
    values[i] = value;
  }

  public void incr(T key, float dValue)
	{
    int i = find(key, true);
    keys[i] = key;
    if(Double.isNaN(values[i])) values[i] = dValue; // New value
    else values[i] += dValue;
  }
  public void scale(T key, float dValue)
	{
    int i = find(key, true);
    if(i == -1) return;
    values[i] *= dValue;
  }
  public int size() { return num; }
  public int capacity() { return keys.length; }
  /*public void clear() { // Keep the same capacity
    num = 0;
    for(int i = 0; i < keys.length; i++)
      keys[i] = null;
  }*/
  public void gut() { values = null; } // Save memory

  // Simple operations on values
  // Implement them here for maximum efficiency.
  public float sum()
	{
		float sum = 0;
    for(int i = 0; i < keys.length; i++)
      if(keys[i] != null)
        sum += values[i];
    return sum;
  }
  public void putAll(float value)
	{
    for(int i = 0; i < keys.length; i++)
      if(keys[i] != null)
        values[i] = value;
  }
  public void incrAll(float dValue)
	{
    for(int i = 0; i < keys.length; i++)
      if(keys[i] != null)
        values[i] += dValue;
  }
  public void multAll(float dValue)
	{
    for(int i = 0; i < keys.length; i++)
      if(keys[i] != null)
        values[i] *= dValue;
  }

  // Return the key with the maximum value
  public T argmax() {
    int besti = -1;
    for(int i = 0; i < keys.length; i++)
      if(keys[i] != null && (besti == -1 || values[i] > values[besti]))
        besti = i;
    return besti == -1 ? null : keys[besti];
  }

  // Return the maximum value
  public float max()
	{
	    int besti = -1;
	    for(int i = 0; i < keys.length; i++)
	      if(keys[i] != null && (besti == -1 || values[i] > values[besti]))
	        besti = i;
	    return besti == -1 ? Float.NEGATIVE_INFINITY : values[besti];
	  }

  // For each (key, value) in map, increment this's key by factor*value
  public void incrMap(TFloatMap map, float factor)
	{
    for(int i = 0; i < map.keys.length; i++)
      if(map.keys[i] != null)
        incr(map.keys[i], factor*map.values[i]);
  }

  // If keys are locked, we can share the same keys.
  public TFloatMap copy() {
    TFloatMap newMap = new TFloatMap(keyFunc);
    newMap.mapType = mapType;
    newMap.locked = locked;
    newMap.num = num;
    newMap.keys = locked ? keys : (T[])keys.clone(); // Share keys! CHECKED
    newMap.values = values.clone();
    return newMap;
  }
  // Return a map with only keys in the set
  public TFloatMap restrict(Set set) {
    TFloatMap newMap = new TFloatMap(keyFunc);
    newMap.mapType = mapType;
    if(mapType == MapType.SORTED_LIST) {
      allocate(getCapacity(num, false));
      for(int i = 0; i < keys.length; i++) {
        if(set.contains(keys[i])) {
          newMap.keys[newMap.num] = keys[i];
          newMap.values[newMap.num] = values[i];
          newMap.num++;
        }
      }
    }
    else if(mapType == MapType.HASH_TABLE) {
      for(int i = 0; i < keys.length; i++)
        if(keys[i] != null && set.contains(keys[i]))
          newMap.put(keys[i], values[i]);
    }
    newMap.locked = locked;
    return newMap;
  }

  // For sorting the entries.
  // Warning: this class has the overhead of the parent class
  private class FullEntry implements Comparable {
    private FullEntry(T key, float value)
		{
      this.key = key;
      this.value = value;
    }

    public int compareTo(FullEntry e) {
      int h1 = hash(key);
      int h2 = hash(e.key);
      if(h1 != h2) return h1-h2;
      return ((Comparable)key).compareTo(e.key);
    }

    private final T key;
    private final float value;
  }

  // Compare by value.
  public class EntryValueComparator implements Comparator {
    public int compare(Entry e1, Entry e2) {
      return Double.compare(values[e1.i], values[e2.i]);
    }
  }
  public EntryValueComparator entryValueComparator() { return new EntryValueComparator(); }

  // For iterating.
  public class Entry {
    private Entry(int i) { this.i = i; }

    public T getKey() { return keys[i]; }
    public float getValue()
		{
			return values[i];
		}

		public void setValue(float newValue)
		{
			values[i] = newValue;
		}

    private final int i;
  }

  public void lock() {
    locked = true;
  }
  public void switchToSortedList() {
    switchMapType(MapType.SORTED_LIST);
  }
  public void switchToHashTable() {
    switchMapType(MapType.HASH_TABLE);
  }

  //////////////////////////////////////////////////////////// 

  public class EntrySet extends AbstractSet {
    @Override
	public Iterator iterator() { return new EntryIterator(); }
    @Override
	public int size() { return num; }
    @Override
	public boolean contains(Object o) { throw new UnsupportedOperationException(); }
    @Override
	public boolean remove(Object o) { throw new UnsupportedOperationException(); }
    @Override
	public void clear() { throw new UnsupportedOperationException(); }
  }
  public class KeySet extends AbstractSet {
    @Override
	public Iterator iterator() { return new KeyIterator(); }
    @Override
	public int size() { return num; }
    @Override
	public boolean contains(Object o) { return containsKey((T)o); } // CHECKED
    @Override
	public boolean remove(Object o) { throw new UnsupportedOperationException(); }
    @Override
	public void clear() { throw new UnsupportedOperationException(); }
  }
  public class ValueCollection extends AbstractCollection
	{
    @Override
	public Iterator iterator()
		{
			return new ValueIterator();
		}
    @Override
	public int size() { return num; }
    @Override
	public boolean contains(Object o) { throw new UnsupportedOperationException(); }
    @Override
	public void clear() { throw new UnsupportedOperationException(); }
  }
  public EntryIterator iterator() { return new EntryIterator(); }
  public EntrySet entrySet() { return new EntrySet(); }
  public KeySet keySet() { return new KeySet(); }
  public ValueCollection values() { return new ValueCollection(); }

  // WARNING: no checks that this iterator is only used when
  // the map is not being structurally changed
  private class EntryIterator extends MapIterator {
    public Entry next() { return new Entry(nextIndex()); }
  }
  private class KeyIterator extends MapIterator {
    public T next() { return keys[nextIndex()]; }
  }
  private class ValueIterator extends MapIterator
	{
		public Float next()
		{
			return values[nextIndex()];
		}
  }
  private abstract class MapIterator implements Iterator {
    public MapIterator() {
      if(mapType == MapType.SORTED_LIST) end = size();
      else end = capacity();
      next = -1;
      nextIndex();
    }

    public boolean hasNext() { return next < end; }
    int nextIndex() {
      int curr = next;
      do { next++; } while(next < end && keys[next] == null);
      return curr;
    }
    public void remove() { throw new UnsupportedOperationException(); }

    private int next, end;
  }

  //////////////////////////////////////////////////////////// 

  /** How much capacity do we need for this type of map,
   * given that we want n elements.
   * compact: whether we want to save space and don't plan on growing.
   */
  private int getCapacity(int n, boolean compact) {
    int capacity;
    if(mapType == MapType.SORTED_LIST)
      capacity = compact ? n : n*growFactor;
    else if(mapType == MapType.HASH_TABLE) {
      capacity = n*growFactor+2; // Make sure there's enough room for n+2 more entries
    }
    else throw new RuntimeException("Internal bug");
    return Math.max(capacity, 1);
  }

  /**
   * Convert the map to the given type.
   */
  private void switchMapType(MapType newMapType) {
    assert !locked;

    //System.out.println("switchMapType(" + newMapType + ", " + compact + ")");

    // Save old keys and values, allocate space
    T[] oldKeys = keys;
    float[] oldValues = values;
    mapType = newMapType;
    allocate(getCapacity(num, true));
    numCollisions = 0;

    if(newMapType == MapType.SORTED_LIST) {
      // Sort the keys
      List entries = new ArrayList(num);
      for(int i = 0; i < oldKeys.length; i++)
        if(oldKeys[i] != null)
          entries.add(new FullEntry(oldKeys[i], oldValues[i]));
      Collections.sort(entries);

      // Populate the sorted list
      for(int i = 0; i < num; i++) {
        keys[i] = entries.get(i).key;
        values[i] = entries.get(i).value;
      }
    }
    else if(mapType == MapType.HASH_TABLE) {
      // Populate the hash table
      num = 0;
      for(int i = 0; i < oldKeys.length; i++) {
        if(oldKeys[i] != null)
          put(oldKeys[i], oldValues[i]);
      }
    }
  }

  /**
   * Return the first index i for which the target key is less than or equal to
   * key i (00001111).  Should insert target key at position i.
   * If target is larger than all of the elements, return size().
   */
  private int binarySearch(T targetKey) {
    int targetHash = hash(targetKey);
    int l = 0, u = num;
    while(l < u) {
      //System.out.println(l);
      int m = (l+u) >> 1;
      int keyHash = hash(keys[m]);
      if(targetHash < keyHash || (targetHash == keyHash && ((Comparable)targetKey).compareTo(keys[m]) <= 0))
        u = m;
      else
        l = m+1;
    }
    return l;
  }

  // Modified hash (taken from HashMap.java).
  private int hash(T x) {
    int h = x.hashCode();
    h += ~(h << 9);
    h ^=  (h >>> 14);
    h +=  (h << 4);
    h ^=  (h >>> 10);
    if(h < 0) h = -h; // New
    return h;
  }

  /**
   * Modify is whether to make room for the new key if it doesn't exist.
   * If a new entry is created, the value at that position will be Double.NaN.
   * Here's where all the magic happens.
   */
  private int find(T key, boolean modify) {
    //System.out.println("find " + key + " " + modify + " " + mapType + " " + capacity());

    if(mapType == MapType.SORTED_LIST) {
      // Binary search
      int i = binarySearch(key);
      if(i < num && keys[i] != null && key.equals(keys[i])) return i;
      if(modify) {
        if(locked)
          throw new RuntimeException("Cannot make new entry for " + key + ", because map is locked");

        if(num == capacity())
          changeSortedListCapacity(getCapacity(num+1, false));

        // Shift everything forward
        for(int j = num; j > i; j--) {
          keys[j] = keys[j-1];
          values[j] = values[j-1];
        }
        num++;
        values[i] = Float.NaN;
        return i;
      }
      else
        return -1;
    }
    else if(mapType == MapType.HASH_TABLE) {
      int capacity = capacity();
      int keyHash = hash(key);
      int i = keyHash % capacity;
      if(i < 0) i = -i; // Arbitrary transformation

      // Make sure big enough
      if(!locked && modify && (num > loadFactor*capacity || capacity <= num+1)) {
        /*if(locked)
          throw new RuntimeException("Cannot make new entry for " + key + ", because map is locked");*/

        switchMapType(MapType.HASH_TABLE);
        return find(key, modify);
      }

      //System.out.println("!!! " + keyHash + " " + capacity);
      if(num == capacity)
        throw new RuntimeException("Hash table is full: " + capacity);
      while(keys[i] != null && !keys[i].equals(key)) { // Collision
        // Warning: infinite loop if the hash table is full
        // (but this shouldn't happen based on the check above)
        i++;
        numCollisions++;
        if(i == capacity) i = 0;
      }
      if(keys[i] != null) { // Found
        assert key.equals(keys[i]);
        return i;
      }
      if(modify) { // Not found
        num++;
        values[i] = Float.NaN;
        return i;
      }
      else
        return -1;
    }
    else
      throw new RuntimeException("Internal bug: " + mapType);
  }

  private void allocate(int n) {
    keys = keyFunc.createArray(n);
    values = new float[n];
  }

  // Resize the sorted list to the new capacity.
  private void changeSortedListCapacity(int newCapacity) {
    assert mapType == MapType.SORTED_LIST;
    assert newCapacity >= num;
    T[] oldKeys = keys;
    float[] oldValues = values;
    allocate(newCapacity);
    System.arraycopy(oldKeys, 0, keys, 0, num);
    System.arraycopy(oldValues, 0, values, 0, num);
  }

  // Check consistency of data structure.
  private void repCheck() {
    assert capacity() > 0;
    if(mapType == MapType.SORTED_LIST) {
      assert num <= capacity();
      for(int i = 1; i < num; i++) { // Make sure keys are sorted.
        int h1 = hash(keys[i-1]);
        int h2 = hash(keys[i]);
        assert h1 <= h2;
        if(h1 == h2)
          assert ((Comparable)keys[i-1]).compareTo(keys[i]) < 0;
      }
    }
  }

  public void debugDump() {
    LogInfo.logsForce("--------------------");
    LogInfo.logsForce("mapType = " + mapType);
    LogInfo.logsForce("locked = " + locked);
    LogInfo.logsForce("size/capacity = " + size() + "/" + capacity());
    LogInfo.logsForce("numCollisions = " + numCollisions);
    /*for(int i = 0; i < keys.length; i++) {
      System.out.printf("[%d] %s (%d) => %f\n", i, keys[i], (keys[i] == null ? 0 : keys[i].hashCode()), values[i]);
    }*/
  }

  /**
   * Format: mapType, num, (key, value) pairs
   */
  private void writeObject(ObjectOutputStream out) throws IOException {
    out.writeObject(mapType);
    out.writeInt(num);
    for(Entry e : this) {
      out.writeObject(e.getKey());
      out.writeDouble(e.getValue());
    }
  }
  private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
    this.mapType = (MapType)in.readObject();
    this.num = 0;
    this.locked = false;

    int n = in.readInt();
    allocate(getCapacity(n, true));

    for(int i = 0; i < n; i++) {
      T key = keyFunc.intern((T)in.readObject()); // CHECKED
      float value = in.readFloat();
      if(mapType == MapType.SORTED_LIST) {
        // Assume keys and values serialized in sorted order
        keys[num] = key;
        values[num] = value;
        num++;
      }
      else if(mapType == MapType.HASH_TABLE) {
        put(key, value);
      }
    }
  }

  // Construct a map from a list of key, value, key value arguments.
  public static  TFloatMap newMap(Object... args) {
    if(args.length % 2 != 0) throw Exceptions.bad;
    TFloatMap map = new TFloatMap();
    for(int i = 0; i < args.length; i += 2) {
      T key = (T)args[i];
      Object value = args[i+1];
      if (value instanceof Integer) value = (float) ((Integer) value);
      map.put((T) args[i], (Float) value);
    }
    return map;
  }

  @Override
public String toString(){
	  StringBuilder sb = new StringBuilder();
	  sb.append("[");
	  for (TFloatMap.Entry entry : entrySet()){
		  sb.append(entry.getKey() + ":" + entry.getValue()+", ");
	  }
	  sb.append("]");
	  return sb.toString();
  }
  private float[] values;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy