All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.util.FastCounterMap Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.util;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;


//import fig.basic.Pair;

/**
 * A countermap that uses fast counters.  This should most certainly be rewritten.
 * 
 * John
 */
public class FastCounterMap implements java.io.Serializable {
	private static final long serialVersionUID = 1L;
	Map> counterMap;
	boolean sortedList;

	protected FastCounter ensureCounter(K key) {
		FastCounter valueCounter = counterMap.get(key);
		if (valueCounter == null) {
			valueCounter = new FastCounter();
			if (sortedList) valueCounter.switchToSortedList();
			counterMap.put(key, valueCounter);
		}
		return valueCounter;
	}

	/**
	 * Returns the keys that have been inserted into this FastCounterMap.
	 */
	public Set keySet() {
		return counterMap.keySet();
	}

	/**
	 * Sets the count for a particular (key, value) pair.
	 */
	public void setCount(K key, V value, double count) {
		FastCounter valueCounter = ensureCounter(key);
		valueCounter.setCount(value, count);
	}

	//	public void setCount(Pair pair) {
	//		
	//	}

	/**
	 * Increments the count for a particular (key, value) pair.
	 */
	public void incrementCount(K key, V value, double count) {
		FastCounter valueCounter = ensureCounter(key);
		valueCounter.incrementCount(value, count);
	}

	/**
	 * Gets the count of the given (key, value) entry, or zero if that entry is
	 * not present.  Does not create any objects.
	 */
	public double getCount(K key, V value) {
		FastCounter valueCounter = counterMap.get(key);
		if (valueCounter == null) return 0.0;
		return valueCounter.getCount(value);
	}

	/**
	 * Gets the sub-counter for the given key.  If there is none, a counter is
	 * created for that key, and installed in the CounterMap.  You can, for
	 * example, add to the returned empty counter directly (though you shouldn't).
	 * This is so whether the key is present or not, modifying the returned
	 * counter has the same effect (but don't do it).
	 */
	public FastCounter getCounter(K key) {
		return ensureCounter(key);
	}

	public void incrementAll(Map map, double count) {
		for (Map.Entry entry : map.entrySet()) {
			incrementCount(entry.getKey(), entry.getValue(), count);
		}
	}

	public void incrementAll(FastCounterMap cMap) {
		for (K key : cMap.keySet()) {
			for (V value : cMap.getCounter(key).keySet()) {
				incrementCount(key, value, cMap.getCounter(key).getCount(value));
			}
		}
	}

	/**
	 * Gets the total count of the given key, or zero if that key is
	 * not present.  Does not create any objects.
	 */
	public double getCount(K key) {
		FastCounter valueCounter = counterMap.get(key);
		if (valueCounter == null) return 0.0;
		return valueCounter.totalCount();
	}

	/**
	 * Returns the total of all counts in sub-counters.  This implementation is
	 * linear; it recalculates the total each time.
	 */
	public double totalCount() {
		double total = 0.0;
		for (Map.Entry> entry : counterMap.entrySet()) {
			FastCounter counter = entry.getValue();
			total += counter.totalCount();
		}
		return total;
	}

	/**
	 * Returns the total number of (key, value) entries in the CounterMap (not
	 * their total counts).
	 */
	public int totalSize() {
		int total = 0;
		for (Map.Entry> entry : counterMap.entrySet()) {
			FastCounter counter = entry.getValue();
			total += counter.size();
		}
		return total;
	}

	/**
	 * The number of keys in this CounterMap (not the number of key-value entries
	 * -- use totalSize() for that)
	 */
	public int size() {
		return counterMap.size();
	}

	/**
	 * True if there are no entries in the CounterMap (false does not mean
	 * totalCount > 0)
	 */
	public boolean isEmpty() {
		return size() == 0;
	}

	/**
	 * Finds the key with maximum count.  This is a linear operation, and ties are broken arbitrarily.
	 *
	 * @return a key with minumum count
	 */
	public Pair argMax() {
		double maxCount = Double.NEGATIVE_INFINITY;
		Pair maxKey = null;
		for (Map.Entry> entry : counterMap.entrySet()) {
			FastCounter counter = entry.getValue();
			V localMax = counter.argMax();
			if (counter.getCount(localMax) > maxCount || maxKey == null) {
				maxKey = new Pair(entry.getKey(), localMax);
				maxCount = counter.getCount(localMax);
			}
		}
		return maxKey;
	}

	@Override
	public String toString() {
		StringBuilder sb = new StringBuilder("[\n");
		for (Map.Entry> entry : counterMap.entrySet()) {
			sb.append("  ");
			sb.append(entry.getKey());
			sb.append(" -> ");
			sb.append(entry.getValue().toString(20));
			sb.append("\n");
		}
		sb.append("]");
		return sb.toString();
	}

	public FastCounterMap(FastCounterMap cm) {
		this();
		incrementAll(cm);
	}

	//	public boolean isEqualTo(FastCounterMap map) {
	//		boolean tmp = true;
	//		FastCounterMap bigger = map.size() > size() ? map : this;
	//		for (K k : bigger.keySet()) {
	//			tmp &= map.getCounter(k).isEqualTo(getCounter(k));
	//		}
	//		return tmp;
	//	}

	public FastCounterMap() {
		counterMap = new HashMap>();
	}

	public static void main(String[] args) {
		FastCounterMap bigramCounterMap = new FastCounterMap();
		bigramCounterMap.incrementCount("people", "run", 1);
		bigramCounterMap.incrementCount("cats", "growl", 2);
		bigramCounterMap.incrementCount("cats", "scamper", 3);
		System.out.println(bigramCounterMap);
		System.out.println("Entries for cats: " + bigramCounterMap.getCounter("cats"));
		System.out.println("Entries for dogs: " + bigramCounterMap.getCounter("dogs"));
		System.out.println("Count of cats scamper: "
				+ bigramCounterMap.getCount("cats", "scamper"));
		System.out.println("Count of snakes slither: "
				+ bigramCounterMap.getCount("snakes", "slither"));
		System.out.println("Total size: " + bigramCounterMap.totalSize());
		System.out.println("Total count: " + bigramCounterMap.totalCount());
		System.out.println(bigramCounterMap);
	}

	public void normalize() {
		for (K key : keySet()) {
			getCounter(key).normalize();
		}
	}

	public void normalizeWithDiscount(double discount) {
		for (K key : keySet()) {
			FastCounter ctr = getCounter(key);
			double totalCount = ctr.totalCount();
			for (V value : ctr.keySet()) {
				ctr.setCount(value, (ctr.getCount(value) - discount) / totalCount);
			}
		}
	}

	/**
	 * Constructs reverse CounterMap where the count of a pair (k,v)
	 * is the count of (v,k) in the current CounterMap
	 * @return
	 */
	public FastCounterMap invert() {
		FastCounterMap invertCounterMap = new FastCounterMap();
		for (K key : this.keySet()) {
			FastCounter keyCounts = this.getCounter(key);
			for (V val : keyCounts.keySet()) {
				double count = keyCounts.getCount(val);
				invertCounterMap.setCount(val, key, count);
			}
		}
		return invertCounterMap;
	}

	public boolean containsKey(K key) {
		return counterMap.containsKey(key);
	}

	public Iterator> getPairIterator() {

		class PairIterator implements Iterator> {

			Iterator outerIt;
			Iterator innerIt;
			K curKey;

			public PairIterator() {
				outerIt = keySet().iterator();
			}

			private boolean advance() {
				if (innerIt == null || !innerIt.hasNext()) {
					if (!outerIt.hasNext()) {
						return false;
					}
					curKey = outerIt.next();
					innerIt = getCounter(curKey).keySet().iterator();
				}
				return true;
			}

			public boolean hasNext() {
				return advance();
			}

			public Pair next() {
				advance();
				assert curKey != null;
				return Pair.newPair(curKey, innerIt.next());
			}

			public void remove() {
			// TODO Auto-generated method stub

			}

		};
		return new PairIterator();
	}

	public Set>> getEntrySet() {
		// TODO Auto-generated method stub
		return counterMap.entrySet();
	}

	public void removeKey(K oldIndex) {
		counterMap.remove(oldIndex);

	}

	public void setCounter(K newIndex, FastCounter counter) {
		counterMap.put(newIndex, counter);

	}

	public void setSortedList(boolean sortedList) {
		this.sortedList = sortedList;
		for (Map.Entry> entry : getEntrySet()) {
			FastCounter ctr = entry.getValue();
			if (sortedList) {
				ctr.switchToSortedList();
			} else {
				ctr.switchToHashTable();
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy