edu.isi.nlp.collections.MultisetUtils Maven / Gradle / Ivy
The newest version!
package edu.isi.nlp.collections;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
import com.google.common.collect.TreeMultiset;
import java.util.Comparator;
/**
* Utilities for working with Guava'a {@link com.google.common.collect.Multiset}s.
*
* @author Ryan Gabbard
*/
public final class MultisetUtils {
private MultisetUtils() {
throw new UnsupportedOperationException();
}
/**
* Returns a {@link Multiset} whose elements are the counts of the elements in the input {@link
* Multiset}. This is most useful for generating histograms. The histogram elements will be in
* ascending order.
*/
public static Multiset histogram(Multiset data) {
return histogram(data, Ordering.natural());
}
/**
* Returns a {@link Multiset} whose elements are the counts of the elements in the input {@link
* Multiset}. This is most useful for generating histograms. Whether the histogram elements are in
* ascending order or descending order is controlled by {@code comparator}.
*/
public static Multiset histogram(
Multiset data, Comparator super Integer> comparator) {
final TreeMultiset histogram = TreeMultiset.create(comparator);
for (final Multiset.Entry entry : data.entrySet()) {
histogram.add(entry.getCount());
}
return histogram;
}
/**
* Retrieves all elements from a {@link Multiset} whose count is greater than or equal to {@code
* minCount} (which must be non-negative).
*/
public static Iterable elementsOccuringAtLeast(final Multiset data, int minCount) {
checkArgument(minCount >= 0);
return FluentIterable.from(data.entrySet())
.filter(MultisetUtils.occursAtLeast(minCount))
.transform(MultisetUtils.elementOnly());
}
/** Guava {@link Function} mapping a multiset entry to the element it wraps. */
public static Function, T> elementOnly() {
return new ElementOnly<>();
}
private static final class ElementOnly implements Function, T> {
@Override
public T apply(Multiset.Entry entry) {
return entry.getElement();
}
}
/**
* Guava {@link Predicate} on {@link Multiset.Entry} which passes only if the entry's count is
* greater than or equal to {@code n}, which must be non-negative.
*/
public static Predicate> occursAtLeast(int n) {
checkArgument(n >= 0);
return new OccursAtLeast<>(n);
}
private static final class OccursAtLeast implements Predicate> {
OccursAtLeast(int n) {
this.n = n;
}
@Override
public boolean apply(Multiset.Entry entry) {
return entry.getCount() >= n;
}
private final int n;
}
/**
* An ordering of {@link Multiset} elements by descending order of count, with ties broken
* according to the supplied {@code itemComparator}.
*/
public static Ordering> byCountDescendingThenItemAscendingOrdering(
Comparator super E> itemComparator) {
final Ordering> byCount = byCountOrdering();
final Ordering> byCountReversed = byCount.reverse();
final Ordering> byElement = byElementOrdering(itemComparator);
return byCountReversed.compound(byElement);
}
public static Function, Integer> toCountFunction() {
return new ToCountFunction<>();
}
private static class ToCountFunction implements Function, Integer> {
@Override
public Integer apply(final Multiset.Entry input) {
return input.getCount();
}
}
/**
* Returns the partial {@link Ordering} over {@link Multiset.Entry}s resulting from applying the
* supplied {@code comparator} to the multiset elements.
*/
public static Ordering> byElementOrdering(
Comparator super ElementType> comparator) {
return Ordering.from(comparator).onResultOf(MultisetUtils.elementOnly());
}
/** Returns a partial {@link Ordering} over {@link Multiset.Entry}s by their count. */
public static Ordering> byCountOrdering() {
return Ordering.natural().onResultOf(MultisetUtils.toCountFunction());
}
/**
* Gets the maximum count of any element in a {@link Multiset}. This will be zero if the {@code
* Multiset} is empty.
*/
public static int maxCount(final Multiset> multiset) {
int ret = 0;
for (final Multiset.Entry> entry : multiset.entrySet()) {
if (entry.getCount() > ret) {
ret = entry.getCount();
}
}
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy