edu.stanford.nlp.stats.Counters Maven / Gradle / Ivy
Show all versions of stanford-parser Show documentation
// Stanford JavaNLP support classes
// Copyright (c) 2004-2008 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
// [email protected]
// http://nlp.stanford.edu/software/
package edu.stanford.nlp.stats;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.lang.reflect.Constructor;
import java.text.NumberFormat;
import java.util.AbstractCollection;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.math.SloppyMath;
import edu.stanford.nlp.util.BinaryHeapPriorityQueue;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.ErasureUtils;
import edu.stanford.nlp.util.Factory;
import edu.stanford.nlp.util.FixedPrioritiesPriorityQueue;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.PriorityQueue;
import edu.stanford.nlp.util.Sets;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.PrettyLogger;
import edu.stanford.nlp.util.logging.Redwood.RedwoodChannels;
/**
* Static methods for operating on a {@link Counter}.
*
* All methods that change their arguments change the first argument
* (only), and have "InPlace" in their name. This class also provides access to
* Comparators that can be used to sort the keys or entries of this Counter by
* the counts, in either ascending or descending order.
*
* @author Galen Andrew ([email protected])
* @author Jeff Michels ([email protected])
* @author dramage
* @author daniel cer (http://dmcer.net)
* @author Christopher Manning
* @author stefank (Optimized dot product)
*/
public class Counters {
private static final double LOG_E_2 = Math.log(2.0);
private Counters() {} // only static methods
//
// Log arithmetic operations
//
/**
* Returns ArrayMath.logSum of the values in this counter.
*
* @param c Argument counter (which is not modified)
* @return ArrayMath.logSum of the values in this counter.
*/
public static double logSum(Counter c) {
return ArrayMath.logSum(ArrayMath.unbox(c.values()));
}
/**
* Transform log space values into a probability distribution in place. On the
* assumption that the values in the Counter are in log space, this method
* calculates their sum, and then subtracts the log of their sum from each
* element. That is, if a counter has keys c1, c2, c3 with values v1, v2, v3,
* the value of c1 becomes v1 - log(e^v1 + e^v2 + e^v3). After this, e^v1 +
* e^v2 + e^v3 = 1.0, so Counters.logSum(c) = 0.0 (approximately).
*
* @param c The Counter to log normalize in place
*/
@SuppressWarnings( { "UnnecessaryUnboxing" })
public static void logNormalizeInPlace(Counter c) {
double logsum = logSum(c);
// for (E key : c.keySet()) {
// c.incrementCount(key, -logsum);
// }
// This should be faster
for (Map.Entry e : c.entrySet()) {
e.setValue(e.getValue().doubleValue() - logsum);
}
}
//
// Query operations
//
/**
* Returns the value of the maximum entry in this counter. This is also the
* L_infinity norm. An empty counter is given a max value of
* Double.NEGATIVE_INFINITY.
*
* @param c The Counter to find the max of
* @return The maximum value of the Counter
*/
public static double max(Counter c) {
return max(c, Double.NEGATIVE_INFINITY); // note[gabor]: Should the default actually be 0 rather than negative_infinity?
}
/**
* Returns the value of the maximum entry in this counter. This is also the
* L_infinity norm. An empty counter is given a max value of
* Double.NEGATIVE_INFINITY.
*
* @param c The Counter to find the max of
* @param valueIfEmpty The value to return if this counter is empty (i.e., the maximum is not well defined.
* @return The maximum value of the Counter
*/
public static double max(Counter c, double valueIfEmpty) {
if (c.size() == 0) {
return valueIfEmpty;
} else {
double max = Double.NEGATIVE_INFINITY;
for (double v : c.values()) {
max = Math.max(max, v);
}
return max;
}
}
/**
* Takes in a Collection of something and makes a counter, incrementing once
* for each object in the collection.
*
* @param c The Collection to turn into a counter
* @return The counter made out of the collection
*/
public static Counter asCounter(Collection c) {
Counter count = new ClassicCounter();
for (E elem : c) {
count.incrementCount(elem);
}
return count;
}
/**
* Returns the value of the smallest entry in this counter.
*
* @param c The Counter (not modified)
* @return The minimum value in the Counter
*/
public static double min(Counter c) {
double min = Double.POSITIVE_INFINITY;
for (double v : c.values()) {
min = Math.min(min, v);
}
return min;
}
/**
* Finds and returns the key in the Counter with the largest count. Returning
* null if count is empty.
*
* @param c The Counter
* @return The key in the Counter with the largest count.
*/
public static E argmax(Counter c) {
return argmax(c, (x, y) -> 0, null);
}
/**
* Finds and returns the key in this Counter with the smallest count.
*
* @param c The Counter
* @return The key in the Counter with the smallest count.
*/
public static E argmin(Counter c) {
double min = Double.POSITIVE_INFINITY;
E argmin = null;
for (E key : c.keySet()) {
double count = c.getCount(key);
if (argmin == null || count < min) { // || (count == min && tieBreaker.compare(key, argmin) < 0)
min = count;
argmin = key;
}
}
return argmin;
}
/**
* Finds and returns the key in the Counter with the largest count. Returning
* null if count is empty.
*
* @param c The Counter
* @param tieBreaker the tie breaker for when elements have the same value.
* @return The key in the Counter with the largest count.
*/
public static E argmax(Counter c, Comparator tieBreaker) {
return argmax(c, tieBreaker, (E) null);
}
/**
* Finds and returns the key in the Counter with the largest count. Returning
* null if count is empty.
*
* @param c The Counter
* @param tieBreaker the tie breaker for when elements have the same value.
* @param defaultIfEmpty The value to return if the counter is empty.
* @return The key in the Counter with the largest count.
*/
public static E argmax(Counter c, Comparator tieBreaker, E defaultIfEmpty) {
if (c.size() == 0) {
return defaultIfEmpty;
}
double max = Double.NEGATIVE_INFINITY;
E argmax = null;
for (E key : c.keySet()) {
double count = c.getCount(key);
if (argmax == null || count > max || (count == max && tieBreaker.compare(key, argmax) < 0)) {
max = count;
argmax = key;
}
}
return argmax;
}
/**
* Finds and returns the key in this Counter with the smallest count.
*
* @param c The Counter
* @return The key in the Counter with the smallest count.
*/
public static E argmin(Counter c, Comparator tieBreaker) {
double min = Double.POSITIVE_INFINITY;
E argmin = null;
for (E key : c.keySet()) {
double count = c.getCount(key);
if (argmin == null || count < min || (count == min && tieBreaker.compare(key, argmin) < 0)) {
min = count;
argmin = key;
}
}
return argmin;
}
/**
* Returns the mean of all the counts (totalCount/size).
*
* @param c The Counter to find the mean of.
* @return The mean of all the counts (totalCount/size).
*/
public static double mean(Counter c) {
return c.totalCount() / c.size();
}
public static double standardDeviation(Counter c) {
double std = 0;
double mean = c.totalCount() / c.size();
for (Map.Entry en : c.entrySet()) {
std += (en.getValue() - mean) * (en.getValue() - mean);
}
return Math.sqrt(std / c.size());
}
//
// In-place arithmetic
//
/**
* Sets each value of target to be target[k]+scale*arg[k] for all keys k in
* target.
*
* @param target A Counter that is modified
* @param arg The Counter whose contents are added to target
* @param scale How the arg Counter is scaled before being added
*/
// TODO: Rewrite to use arg.entrySet()
public static void addInPlace(Counter target, Counter arg, double scale) {
for (E key : arg.keySet()) {
target.incrementCount(key, scale * arg.getCount(key));
}
}
/**
* Sets each value of target to be target[k]+arg[k] for all keys k in arg.
*/
public static void addInPlace(Counter target, Counter arg) {
for (Map.Entry entry : arg.entrySet()) {
double count = entry.getValue();
if (count != 0) {
target.incrementCount(entry.getKey(), count);
}
}
}
/**
* Sets each value of double[] target to be
* target[idx.indexOf(k)]+a.getCount(k) for all keys k in arg
*/
public static void addInPlace(double[] target, Counter arg, Index idx) {
for (Map.Entry entry : arg.entrySet()) {
target[idx.indexOf(entry.getKey())] += entry.getValue();
}
}
/**
* For all keys (u,v) in arg1 and arg2, sets return[u,v] to be summation of both.
* @param
* @param
*/
public static TwoDimensionalCounter add(TwoDimensionalCounter arg1, TwoDimensionalCounter arg2) {
TwoDimensionalCounter add = new TwoDimensionalCounter();
Counters.addInPlace(add , arg1);
Counters.addInPlace(add , arg2);
return add;
}
/**
* For all keys (u,v) in arg, sets target[u,v] to be target[u,v] + scale *
* arg[u,v].
*
* @param
* @param
*/
public static void addInPlace(TwoDimensionalCounter target, TwoDimensionalCounter arg, double scale) {
for (T1 outer : arg.firstKeySet())
for (T2 inner : arg.secondKeySet()) {
target.incrementCount(outer, inner, scale * arg.getCount(outer, inner));
}
}
/**
* For all keys (u,v) in arg, sets target[u,v] to be target[u,v] + arg[u,v].
*
* @param
* @param
*/
public static void addInPlace(TwoDimensionalCounter target, TwoDimensionalCounter arg) {
for (T1 outer : arg.firstKeySet())
for (T2 inner : arg.secondKeySet()) {
target.incrementCount(outer, inner, arg.getCount(outer, inner));
}
}
/**
* Sets each value of target to be target[k]+
* value*(num-of-times-it-occurs-in-collection) if the key is present in the arg
* collection.
*/
public static void addInPlace(Counter target, Collection arg, double value) {
for (E key : arg) {
target.incrementCount(key, value);
}
}
/**
* For all keys (u,v) in target, sets target[u,v] to be target[u,v] + value
*
* @param
* @param
*/
public static void addInPlace(TwoDimensionalCounter target, double value) {
for (T1 outer : target.firstKeySet()){
addInPlace(target.getCounter(outer), value);
}
}
/**
* Sets each value of target to be target[k]+
* num-of-times-it-occurs-in-collection if the key is present in the arg
* collection.
*/
public static void addInPlace(Counter target, Collection arg) {
for (E key : arg) {
target.incrementCount(key, 1);
}
}
/**
* Increments all keys in a Counter by a specific value.
*/
public static void addInPlace(Counter target, double value) {
for (E key : target.keySet()) {
target.incrementCount(key, value);
}
}
/**
* Sets each value of target to be target[k]-arg[k] for all keys k in target.
*/
public static void subtractInPlace(Counter target, Counter arg) {
for (E key : arg.keySet()) {
target.decrementCount(key, arg.getCount(key));
}
}
/**
* Sets each value of double[] target to be
* target[idx.indexOf(k)]-a.getCount(k) for all keys k in arg
*/
public static void subtractInPlace(double[] target, Counter arg, Index idx) {
for (Map.Entry entry : arg.entrySet()) {
target[idx.indexOf(entry.getKey())] -= entry.getValue();
}
}
/**
* Divides every non-zero count in target by the corresponding value in the
* denominator Counter. Beware that this can give NaN values for zero counts
* in the denominator counter!
*/
public static void divideInPlace(Counter target, Counter denominator) {
for (E key : target.keySet()) {
target.setCount(key, target.getCount(key) / denominator.getCount(key));
}
}
/**
* Multiplies every count in target by the corresponding value in the term
* Counter.
*/
public static void dotProductInPlace(Counter target, Counter term) {
for (E key : target.keySet()) {
target.setCount(key, target.getCount(key) * term.getCount(key));
}
}
/**
* Divides each value in target by the given divisor, in place.
*
* @param target The values in this Counter will be changed throughout by the
* multiplier
* @param divisor The number by which to change each number in the Counter
* @return The target Counter is returned (for easier method chaining)
*/
public static Counter divideInPlace(Counter target, double divisor) {
for (Entry entry : target.entrySet()) {
target.setCount(entry.getKey(), entry.getValue() / divisor);
}
return target;
}
/**
* Multiplies each value in target by the given multiplier, in place.
*
* @param target The values in this Counter will be multiplied by the
* multiplier
* @param multiplier The number by which to change each number in the Counter
*/
public static Counter multiplyInPlace(Counter target, double multiplier) {
for (Entry entry : target.entrySet()) {
target.setCount(entry.getKey(), entry.getValue() * multiplier);
}
return target;
}
/**
* Multiplies each value in target by the count of the key in mult, in place. Returns non zero entries
*
* @param target The counter
* @param mult The counter you want to multiply with target
*/
public static Counter multiplyInPlace(Counter target, Counter mult) {
for (Entry entry : target.entrySet()) {
target.setCount(entry.getKey(), entry.getValue() * mult.getCount(entry.getKey()));
}
Counters.retainNonZeros(target);
return target;
}
/**
* Normalizes the target counter in-place, so the sum of the resulting values
* equals 1.
*
* @param Type of elements in Counter
*/
public static void normalize(Counter target) {
divideInPlace(target, target.totalCount());
}
/**
* L1 normalize a counter. Return a counter that is a probability distribution,
* so the sum of the resulting value equals 1.
*
* @param c The {@link Counter} to be L1 normalized. This counter is not
* modified.
* @return A new L1-normalized Counter based on c.
*/
public static > C asNormalizedCounter(C c) {
return scale(c, 1.0 / c.totalCount());
}
/**
* Normalizes the target counter in-place, so the sum of the resulting values
* equals 1.
*
* @param Type of elements in TwoDimensionalCounter
* @param Type of elements in TwoDimensionalCounter
*/
public static void normalize(TwoDimensionalCounter target) {
Counters.divideInPlace(target, target.totalCount());
}
public static void logInPlace(Counter target) {
for (E key : target.keySet()) {
target.setCount(key, Math.log(target.getCount(key)));
}
}
//
// Selection Operators
//
/**
* Delete 'top' and 'bottom' number of elements from the top and bottom
* respectively
*/
public static List deleteOutofRange(Counter c, int top, int bottom) {
List purgedItems = new ArrayList();
int numToPurge = top + bottom;
if (numToPurge <= 0) {
return purgedItems;
}
List l = Counters.toSortedList(c);
for (int i = 0; i < top; i++) {
E item = l.get(i);
purgedItems.add(item);
c.remove(item);
}
int size = c.size();
for (int i = c.size() - 1; i >= (size - bottom); i--) {
E item = l.get(i);
purgedItems.add(item);
c.remove(item);
}
return purgedItems;
}
/**
* Removes all entries from c except for the top {@code num}.
*/
public static void retainTop(Counter c, int num) {
int numToPurge = c.size() - num;
if (numToPurge <= 0) {
return;
}
List l = Counters.toSortedList(c, true);
for (int i = 0; i < numToPurge; i++) {
c.remove(l.get(i));
}
}
/**
* Removes all entries from c except for the top {@code num}.
*/
public static > void retainTopKeyComparable(Counter c, int num) {
int numToPurge = c.size() - num;
if (numToPurge <= 0) {
return;
}
List l = Counters.toSortedListKeyComparable(c);
Collections.reverse(l);
for (int i = 0; i < numToPurge; i++) {
c.remove(l.get(i));
}
}
/**
* Removes all entries from c except for the bottom {@code num}.
*/
public static List retainBottom(Counter c, int num) {
int numToPurge = c.size() - num;
if (numToPurge <= 0) {
return Generics.newArrayList();
}
List removed = new ArrayList();
List l = Counters.toSortedList(c);
for (int i = 0; i < numToPurge; i++) {
E rem = l.get(i);
removed.add(rem);
c.remove(rem);
}
return removed;
}
/**
* Removes all entries with 0 count in the counter, returning the set of
* removed entries.
*/
public static Set retainNonZeros(Counter counter) {
Set removed = Generics.newHashSet();
for (E key : counter.keySet()) {
if (counter.getCount(key) == 0.0) {
removed.add(key);
}
}
for (E key : removed) {
counter.remove(key);
}
return removed;
}
/**
* Removes all entries with counts below the given threshold, returning the
* set of removed entries.
*
* @param counter The counter.
* @param countThreshold
* The minimum count for an entry to be kept. Entries (strictly) less
* than this threshold are discarded.
* @return The set of discarded entries.
*/
public static Set retainAbove(Counter counter, double countThreshold) {
Set removed = Generics.newHashSet();
for (E key : counter.keySet()) {
if (counter.getCount(key) < countThreshold) {
removed.add(key);
}
}
for (E key : removed) {
counter.remove(key);
}
return removed;
}
/**
* Removes all entries with counts below the given threshold, returning the
* set of removed entries.
*
* @param counter The counter.
* @param countThreshold
* The minimum count for an entry to be kept. Entries (strictly) less
* than this threshold are discarded.
* @return The set of discarded entries.
*/
public static Set> retainAbove(
TwoDimensionalCounter counter, double countThreshold) {
Set> removed = new HashSet>();
for (Entry> en : counter.entrySet()) {
for (Entry en2 : en.getValue().entrySet()) {
if (counter.getCount(en.getKey(), en2.getKey()) < countThreshold) {
removed.add(new Pair(en.getKey(), en2.getKey()));
}
}
}
for (Pair key : removed) {
counter.remove(key.first(), key.second());
}
return removed;
}
/**
* Removes all entries with counts above the given threshold, returning the
* set of removed entries.
*
* @param counter The counter.
* @param countMaxThreshold
* The maximum count for an entry to be kept. Entries (strictly) more
* than this threshold are discarded.
* @return The set of discarded entries.
*/
public static Counter retainBelow(Counter counter, double countMaxThreshold) {
Counter removed = new ClassicCounter();
for (E key : counter.keySet()) {
double count = counter.getCount(key);
if (counter.getCount(key) > countMaxThreshold) {
removed.setCount(key, count);
}
}
for (Entry key : removed.entrySet()) {
counter.remove(key.getKey());
}
return removed;
}
/**
* Removes all entries with keys that does not match one of the given patterns.
*
* @param counter The counter.
* @param matchPatterns pattern for key to match
* @return The set of discarded entries.
*/
public static Set retainMatchingKeys(Counter counter, List matchPatterns) {
Set removed = Generics.newHashSet();
for (String key : counter.keySet()) {
boolean matched = false;
for (Pattern pattern : matchPatterns) {
if (pattern.matcher(key).matches()) {
matched = true;
break;
}
}
if (!matched) {
removed.add(key);
}
}
for (String key : removed) {
counter.remove(key);
}
return removed;
}
/**
* Removes all entries with keys that does not match the given set of keys.
*
* @param counter The counter
* @param matchKeys Keys to match
* @return The set of discarded entries.
*/
public static Set retainKeys(Counter counter, Collection matchKeys) {
Set removed = Generics.newHashSet();
for (E key : counter.keySet()) {
boolean matched = matchKeys.contains(key);
if (!matched) {
removed.add(key);
}
}
for (E key : removed) {
counter.remove(key);
}
return removed;
}
/**
* Removes all entries with keys in the given collection
*
* @param
* @param counter
* @param removeKeysCollection
*/
public static void removeKeys(Counter counter, Collection removeKeysCollection) {
for (E key : removeKeysCollection)
counter.remove(key);
}
/**
* Removes all entries with keys (first key set) in the given collection
*
* @param
* @param counter
* @param removeKeysCollection
*/
public static void removeKeys(TwoDimensionalCounter counter, Collection removeKeysCollection) {
for (E key : removeKeysCollection)
counter.remove(key);
}
/**
* Returns the set of keys whose counts are at or above the given threshold.
* This set may have 0 elements but will not be null.
*
* @param c The Counter to examine
* @param countThreshold
* Items equal to or above this number are kept
* @return A (non-null) Set of keys whose counts are at or above the given
* threshold.
*/
public static Set keysAbove(Counter c, double countThreshold) {
Set keys = Generics.newHashSet();
for (E key : c.keySet()) {
if (c.getCount(key) >= countThreshold) {
keys.add(key);
}
}
return (keys);
}
/**
* Returns the set of keys whose counts are at or below the given threshold.
* This set may have 0 elements but will not be null.
*/
public static Set keysBelow(Counter c, double countThreshold) {
Set keys = Generics.newHashSet();
for (E key : c.keySet()) {
if (c.getCount(key) <= countThreshold) {
keys.add(key);
}
}
return (keys);
}
/**
* Returns the set of keys that have exactly the given count. This set may
* have 0 elements but will not be null.
*/
public static Set keysAt(Counter c, double count) {
Set keys = Generics.newHashSet();
for (E key : c.keySet()) {
if (c.getCount(key) == count) {
keys.add(key);
}
}
return (keys);
}
//
// Transforms
//
/**
* Returns the counter with keys modified according to function F. Eager
* evaluation. If two keys are same after the transformation, one of the values is randomly chosen (depending on how the keyset is traversed)
*/
public static Counter transform(Counter c, Function f) {
Counter c2 = new ClassicCounter();
for (T1 key : c.keySet()) {
c2.setCount(f.apply(key), c.getCount(key));
}
return c2;
}
/**
* Returns the counter with keys modified according to function F. If two keys are same after the transformation, their values get added up.
*/
public static Counter transformWithValuesAdd(Counter c, Function f) {
Counter c2 = new ClassicCounter();
for (T1 key : c.keySet()) {
c2.incrementCount(f.apply(key), c.getCount(key));
}
return c2;
}
//
// Conversion to other types
//
/**
* Returns a comparator backed by this counter: two objects are compared by
* their associated values stored in the counter. This comparator returns keys
* by ascending numeric value. Note that this ordering is not fixed, but
* depends on the mutable values stored in the Counter. Doing this comparison
* does not depend on the type of the key, since it uses the numeric value,
* which is always Comparable.
*
* @param counter The Counter whose values are used for ordering the keys
* @return A Comparator using this ordering
*/
public static Comparator toComparator(final Counter counter) {
return (o1, o2) -> Double.compare(counter.getCount(o1), counter.getCount(o2));
}
/**
* Returns a comparator backed by this counter: two objects are compared by
* their associated values stored in the counter. This comparator returns keys
* by ascending numeric value. Note that this ordering is not fixed, but
* depends on the mutable values stored in the Counter. Doing this comparison
* does not depend on the type of the key, since it uses the numeric value,
* which is always Comparable.
*
* @param counter The Counter whose values are used for ordering the keys
* @return A Comparator using this ordering
*/
public static > Comparator toComparatorWithKeys(final Counter counter) {
return (o1, o2) -> {
int res = Double.compare(counter.getCount(o1), counter.getCount(o2));
if (res == 0) {
return o1.compareTo(o2);
} else {
return res;
}
};
}
/**
* Returns a comparator backed by this counter: two objects are compared by
* their associated values stored in the counter. This comparator returns keys
* by descending numeric value. Note that this ordering is not fixed, but
* depends on the mutable values stored in the Counter. Doing this comparison
* does not depend on the type of the key, since it uses the numeric value,
* which is always Comparable.
*
* @param counter The Counter whose values are used for ordering the keys
* @return A Comparator using this ordering
*/
public static Comparator toComparatorDescending(final Counter counter) {
return (o1, o2) -> Double.compare(counter.getCount(o2), counter.getCount(o1));
}
/**
* Returns a comparator suitable for sorting this Counter's keys or entries by
* their respective value or magnitude (by absolute value). If
* ascending is true, smaller magnitudes will be returned first,
* otherwise higher magnitudes will be returned first.
*
* Sample usage:
*
*
* Counter c = new Counter();
* // add to the counter...
* List biggestAbsKeys = new ArrayList(c.keySet());
* Collections.sort(biggestAbsKeys, Counters.comparator(c, false, true));
* List smallestEntries = new ArrayList(c.entrySet());
* Collections.sort(smallestEntries, Counters.comparator(c, true, false));
*
*/
public static Comparator toComparator(final Counter counter, final boolean ascending, final boolean useMagnitude) {
return (o1, o2) -> {
if (ascending) {
if (useMagnitude) {
return Double.compare(Math.abs(counter.getCount(o1)), Math.abs(counter.getCount(o2)));
} else {
return Double.compare(counter.getCount(o1), counter.getCount(o2));
}
} else {
// Descending
if (useMagnitude) {
return Double.compare(Math.abs(counter.getCount(o2)), Math.abs(counter.getCount(o1)));
} else {
return Double.compare(counter.getCount(o2), counter.getCount(o1));
}
}
};
}
/**
* A List of the keys in c, sorted from highest count to lowest.
* So note that the default is descending!
*
* @return A List of the keys in c, sorted from highest count to lowest.
*/
public static List toSortedList(Counter c) {
return toSortedList(c, false);
}
/**
* A List of the keys in c, sorted from highest count to lowest.
*
* @return A List of the keys in c, sorted from highest count to lowest.
*/
public static List toSortedList(Counter c, boolean ascending) {
List l = new ArrayList(c.keySet());
Comparator comp = ascending ? toComparator(c) : toComparatorDescending(c);
Collections.sort(l, comp);
return l;
}
/**
* A List of the keys in c, sorted from highest count to lowest.
*
* @return A List of the keys in c, sorted from highest count to lowest.
*/
public static > List toSortedListKeyComparable(Counter c) {
List l = new ArrayList(c.keySet());
Comparator comp = toComparatorWithKeys(c);
Collections.sort(l, comp);
Collections.reverse(l);
return l;
}
/**
* Converts a counter to ranks; ranks start from 0
*
* @return A counter where the count is the rank in the original counter
*/
public static IntCounter toRankCounter(Counter c) {
IntCounter rankCounter = new IntCounter();
List sortedList = toSortedList(c);
for (int i = 0; i < sortedList.size(); i++) {
rankCounter.setCount(sortedList.get(i), i);
}
return rankCounter;
}
/**
* Converts a counter to tied ranks; ranks start from 1
*
* @return A counter where the count is the rank in the original counter; when values are tied, the rank is the average of the ranks of the tied values
*/
public static Counter toTiedRankCounter(Counter c) {
Counter rankCounter = new ClassicCounter();
List> sortedList = toSortedListWithCounts(c);
int i = 0;
Iterator> it = sortedList.iterator();
while(it.hasNext()) {
Pair iEn = it.next();
double icount = iEn.second();
E iKey = iEn.first();
List l = new ArrayList();
List keys = new ArrayList();
l.add(i+1);
keys.add(iKey);
for(int j = i +1; j < sortedList.size(); j++){
Pair jEn = sortedList.get(j);
if( icount == jEn.second()){
l.add(j+1);
keys.add(jEn.first());
}else
break;
}
if(l.size() > 1){
double sum = 0;
for(Integer d: l)
sum += d;
double avgRank = sum/l.size();
for(int k = 0; k < l.size(); k++){
rankCounter.setCount(keys.get(k), avgRank);
if(k != l.size()-1 && it.hasNext())
it.next();
i++;
}
}else{
rankCounter.setCount(iKey, i+1);
i++;
}
}
return rankCounter;
}
public static List> toDescendingMagnitudeSortedListWithCounts(Counter c) {
List keys = new ArrayList(c.keySet());
Collections.sort(keys, toComparator(c, false, true));
List> l = new ArrayList>(keys.size());
for (E key : keys) {
l.add(new Pair(key, c.getCount(key)));
}
return l;
}
/**
* A List of the keys in c, sorted from highest count to lowest, paired with
* counts
*
* @return A List of the keys in c, sorted from highest count to lowest.
*/
public static List> toSortedListWithCounts(Counter c) {
List