edu.stanford.nlp.stats.ClassicCounter Maven / Gradle / Ivy
// Stanford JavaNLP support classes
// Copyright (c) 2001-2008 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
// [email protected]
// http://nlp.stanford.edu/software/
package edu.stanford.nlp.stats;
import java.io.Serializable;
import java.util.AbstractCollection;
import java.util.AbstractSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import edu.stanford.nlp.math.SloppyMath;
import edu.stanford.nlp.util.Factory;
import edu.stanford.nlp.util.MapFactory;
import edu.stanford.nlp.util.MutableDouble;
import edu.stanford.nlp.util.logging.PrettyLogger;
import edu.stanford.nlp.util.logging.Redwood.RedwoodChannels;
/**
* A specialized kind of hash table (or map) for storing numeric counts for
* objects. It works like a Map,
* but with different methods for easily getting/setting/incrementing counts
* for objects and computing various functions with the counts.
* The Counter constructor
* and addAll method can be used to copy another Counter's contents
* over.
*
* Implementation notes:
* You shouldn't casually add further methods to
* this interface. Rather, they should be added to the {@link Counters} class.
* Note that this class stores a
* totalCount
field as well as the map. This makes certain
* operations much more efficient, but means that any methods that change the
* map must also update totalCount
appropriately. If you use the
* setCount
method, then you cannot go wrong.
* This class is not threadsafe: If multiple threads are accessing the same
* counter, then access should be synchronized externally to this class.
*
* @author Dan Klein ([email protected])
* @author Joseph Smarr ([email protected])
* @author Teg Grenager
* @author Galen Andrew
* @author Christopher Manning
* @author Kayur Patel (kdpatel@cs)
*/
public class ClassicCounter implements Serializable, Counter, Iterable {
Map map; // accessed by DeltaCounter
private final MapFactory mapFactory;
private double totalCount; // = 0.0
private double defaultValue; // = 0.0;
private static final long serialVersionUID = 4L;
// for more efficient speed/memory usage
private transient MutableDouble tempMDouble; // = null;
// CONSTRUCTORS
/**
* Constructs a new (empty) Counter backed by a HashMap.
*/
public ClassicCounter() {
this(MapFactory.hashMapFactory());
}
public ClassicCounter(int initialCapacity) {
this(MapFactory.hashMapFactory(), initialCapacity);
}
/**
* Pass in a MapFactory and the map it vends will back your Counter.
*
* @param mapFactory The Map this factory vends will back your Counter.
*/
public ClassicCounter(MapFactory mapFactory) {
this.mapFactory = mapFactory;
this.map = mapFactory.newMap();
}
/**
* Pass in a MapFactory and the map it vends will back your Counter.
*
* @param mapFactory The Map this factory vends will back your Counter.
* @param initialCapacity initial capacity of the counter
*/
public ClassicCounter(MapFactory mapFactory, int initialCapacity) {
this.mapFactory = mapFactory;
this.map = mapFactory.newMap(initialCapacity);
}
/**
* Constructs a new Counter with the contents of the given Counter.
* Implementation note: A new Counter is allocated with its
* own counts, but keys will be shared and should be an immutable class.
*
* @param c The Counter which will be copied.
*/
public ClassicCounter(Counter c) {
this();
Counters.addInPlace(this, c);
setDefaultReturnValue(c.defaultReturnValue());
}
/**
* Constructs a new Counter by counting the elements in the given Collection.
* The Counter is backed by a HashMap.
*
* @param collection Each item in the Collection is made a key in the
* Counter with count being its multiplicity in the Collection.
*/
public ClassicCounter(Collection collection) {
this();
for (E key : collection) {
incrementCount(key);
}
}
public static ClassicCounter identityHashMapCounter() {
return new ClassicCounter<>(MapFactory.identityHashMapFactory());
}
// STANDARD ACCESS MODIFICATION METHODS
/** Get the MapFactory for this Counter.
* This method is needed by the DeltaCounter implementation.
*
* @return The MapFactory
*/
MapFactory getMapFactory() {
return mapFactory;
}
// METHODS NEEDED BY THE Counter INTERFACE
/** {@inheritDoc} */
@Override
public Factory> getFactory() {
return new ClassicCounterFactory<>(getMapFactory());
}
private static class ClassicCounterFactory implements Factory> {
private static final long serialVersionUID = 1L;
private final MapFactory mf;
private ClassicCounterFactory(MapFactory mf) {
this.mf = mf;
}
@Override
public Counter create() {
return new ClassicCounter<>(mf);
}
}
/** {@inheritDoc} */
@Override
public final void setDefaultReturnValue(double rv) { defaultValue = rv; }
/** {@inheritDoc} */
@Override
public double defaultReturnValue() { return defaultValue; }
/** {@inheritDoc} */
@Override
public double getCount(Object key) {
Number count = map.get(key);
if (count == null) {
return defaultValue; // haven't seen this object before -> default count
}
return count.doubleValue();
}
/** {@inheritDoc} */
@Override
public void setCount(E key, double count) {
if (tempMDouble == null) {
//System.out.println("creating mdouble");
tempMDouble = new MutableDouble();
}
//System.out.println("setting mdouble");
tempMDouble.set(count);
//System.out.println("putting mdouble in map");
tempMDouble = map.put(key, tempMDouble);
//System.out.println("placed mDouble in map");
totalCount += count;
if (tempMDouble != null) {
totalCount -= tempMDouble.doubleValue();
}
}
/** {@inheritDoc} */
@Override
public double incrementCount(E key, double count) {
if (tempMDouble == null) {
tempMDouble = new MutableDouble();
}
MutableDouble oldMDouble = map.put(key, tempMDouble);
totalCount += count;
if (oldMDouble != null) {
count += oldMDouble.doubleValue();
}
tempMDouble.set(count);
tempMDouble = oldMDouble;
return count;
}
/** {@inheritDoc} */
@Override
public final double incrementCount(E key) {
return incrementCount(key, 1.0);
}
/** {@inheritDoc} */
@Override
public double decrementCount(E key, double count) {
return incrementCount(key, -count);
}
/** {@inheritDoc} */
@Override
public double decrementCount(E key) {
return incrementCount(key, -1.0);
}
/** {@inheritDoc} */
@Override
public double logIncrementCount(E key, double count) {
if (tempMDouble == null) {
tempMDouble = new MutableDouble();
}
MutableDouble oldMDouble = map.put(key, tempMDouble);
if (oldMDouble != null) {
count = SloppyMath.logAdd(count, oldMDouble.doubleValue());
totalCount += count - oldMDouble.doubleValue();
} else {
totalCount += count;
}
tempMDouble.set(count);
tempMDouble = oldMDouble;
return count;
}
/** {@inheritDoc} */
@Override
public void addAll(Counter counter) {
Counters.addInPlace(this, counter);
}
/** {@inheritDoc} */
@Override
public double remove(E key) {
MutableDouble d = mutableRemove(key); // this also updates totalCount
if(d != null) {
return d.doubleValue();
}
return defaultValue;
}
/** {@inheritDoc} */
@Override
public boolean containsKey(E key) {
return map.containsKey(key);
}
/** {@inheritDoc} */
@Override
public Set keySet() {
return map.keySet();
}
/** {@inheritDoc} */
@Override
public Collection values() {
return new AbstractCollection() {
@Override
public Iterator iterator() {
return new Iterator() {
Iterator inner = map.values().iterator();
@Override
public boolean hasNext() {
return inner.hasNext();
}
@Override
public Double next() {
// copy so as to give safety to mutable internal representation
return Double.valueOf(inner.next().doubleValue());
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public int size() {
return map.size();
}
@Override
public boolean contains(Object v) {
return v instanceof Double && map.values().contains(new MutableDouble((Double) v));
}
};
}
/** {@inheritDoc} */
@Override
public Set> entrySet() {
return new AbstractSet>() {
@Override
public Iterator> iterator() {
return new Iterator>() {
final Iterator> inner = map.entrySet().iterator();
@Override
public boolean hasNext() {
return inner.hasNext();
}
@Override
public Entry next() {
return new Entry() {
final Entry e = inner.next();
public double getDoubleValue() {
return e.getValue().doubleValue();
}
public double setValue(double value) {
final double old = e.getValue().doubleValue();
e.getValue().set(value);
totalCount = totalCount - old + value;
return old;
}
@Override
public E getKey() {
return e.getKey();
}
@Override
public Double getValue() {
return getDoubleValue();
}
@Override
public Double setValue(Double value) {
return setValue(value.doubleValue());
}
};
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public int size() {
return map.size();
}
};
}
/** {@inheritDoc} */
@Override
public void clear() {
map.clear();
totalCount = 0.0;
}
/** {@inheritDoc} */
@Override
public int size() {
return map.size();
}
/** {@inheritDoc} */
@Override
public double totalCount() {
return totalCount;
}
// ADDITIONAL MAP LIKE OPERATIONS (NOT IN Counter INTERFACE)
// THEIR USE IS DISCOURAGED, BUT THEY HAVEN'T (YET) BEEN REMOVED.
/** This is a shorthand for keySet.iterator(). It's not really clear that
* this method should be here, as the Map interface has no such shortcut,
* but it's used in a number of places, and I've left it in for now.
* Use is discouraged.
*
* @return An Iterator over the keys in the Counter.
*/
@Override
public Iterator iterator() {
return keySet().iterator();
}
/** This is used internally to the class for getting back a
* MutableDouble in a remove operation. Not for public use.
*
* @param key The key to remove
* @return Its value as a MutableDouble
*/
private MutableDouble mutableRemove(E key) {
MutableDouble md = map.remove(key);
if (md != null) {
totalCount -= md.doubleValue();
}
return md;
}
/**
* Removes all the given keys from this Counter.
* Keys may be included that are not actually in the
* Counter - no action is taken in response to those
* keys. This behavior should be retained in future
* revisions of Counter (matches HashMap).
*
* @param keys The keys to remove from the Counter. Their values are
* subtracted from the total count mass of the Counter.
*/
public void removeAll(Collection keys) {
for (E key : keys) {
mutableRemove(key);
}
}
/** Returns whether a Counter has no keys in it.
*
* @return true iff a Counter has no keys in it.
*/
public boolean isEmpty() {
return size() == 0;
}
// OBJECT STUFF
// NOTE: Using @inheritdoc to get back to Object's javadoc doesn't work
// on a class that implements an interface in 1.6. Weird, but there you go.
/** Equality is defined over all Counter implementations.
* Two Counters are equal if they have the same keys explicitly stored
* with the same values.
*
* Note that a Counter with a key with value defaultReturnValue will not
* be judged equal to a Counter that is lacking that key. In order for
* two Counters to be correctly judged equal in such cases, you should
* call Counters.retainNonDefaultValues() on both Counters first.
*
* @param o Object to compare for equality
* @return Whether this is equal to o
*/
@Override
@SuppressWarnings("unchecked")
public boolean equals(Object o) {
if (this == o) {
return true;
} else if ( ! (o instanceof Counter)) {
return false;
} else if ( ! (o instanceof ClassicCounter)) {
return Counters.equals(this, (Counter) o);
}
final ClassicCounter counter = (ClassicCounter) o;
return totalCount == counter.totalCount && map.equals(counter.map);
}
/** Returns a hashCode which is the underlying Map's hashCode.
*
* @return A hashCode.
*/
@Override
public int hashCode() {
return map.hashCode();
}
/** Returns a String representation of the Counter, as formatted by
* the underlying Map.
*
* @return A String representation of the Counter.
*/
@Override
public String toString() {
return map.toString();
}
// EXTRA I/O METHODS
/**
* Returns the Counter over Strings specified by this String.
* The String is often the whole contents of a file.
* The file can include comments if each line of comment starts with
* a hash (#) symbol, and does not contain any TAB characters.
* Otherwise, the format is one entry per line. Each line must contain
* precisely one tab separating a key and a value, giving a format of:
*
* StringKey\tdoubleValue\n
*
*
* @param s String representation of a Counter, where entries are one per
* line such that each line is either a comment (begins with #)
* or key \t value
* @return The Counter with String keys
*/
public static ClassicCounter valueOfIgnoreComments(String s) {
ClassicCounter result = new ClassicCounter<>();
String[] lines = s.split("\n");
for (String line : lines) {
String[] fields = line.split("\t");
if (fields.length != 2) {
if (line.startsWith("#")) {
continue;
} else {
throw new RuntimeException("Got unsplittable line: \"" + line + '\"');
}
}
result.setCount(fields[0], Double.parseDouble(fields[1]));
}
return result;
}
/**
* Converts from the format printed by the toString method back into
* a Counter<String>. The toString() doesn't escape, so this only
* works providing the keys of the Counter do not have commas or equals signs
* in them.
*
* @param s A String representation of a Counter
* @return The Counter
*/
public static ClassicCounter fromString(String s) {
ClassicCounter result = new ClassicCounter<>();
if (!s.startsWith("{") || !s.endsWith("}")) {
throw new RuntimeException("invalid format: ||"+s+"||");
}
s = s.substring(1, s.length()-1);
String[] lines = s.split(", ");
for (String line : lines) {
String[] fields = line.split("=");
if (fields.length!=2) throw new RuntimeException("Got unsplittable line: \"" + line + '\"');
result.setCount(fields[0], Double.parseDouble(fields[1]));
}
return result;
}
/**
* {@inheritDoc}
*/
@Override
public void prettyLog(RedwoodChannels channels, String description) {
PrettyLogger.log(channels, description, Counters.asMap(this));
}
}