All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.collections.MultisetUtils Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.collections;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
import com.google.common.collect.TreeMultiset;
import java.util.Comparator;

/**
 * Utilities for working with Guava'a {@link com.google.common.collect.Multiset}s.
 *
 * @author Ryan Gabbard
 */
public final class MultisetUtils {

  private MultisetUtils() {
    throw new UnsupportedOperationException();
  }

  /**
   * Returns a {@link Multiset} whose elements are the counts of the elements in the input {@link
   * Multiset}. This is most useful for generating histograms. The histogram elements will be in
   * ascending order.
   */
  public static  Multiset histogram(Multiset data) {
    return histogram(data, Ordering.natural());
  }

  /**
   * Returns a {@link Multiset} whose elements are the counts of the elements in the input {@link
   * Multiset}. This is most useful for generating histograms. Whether the histogram elements are in
   * ascending order or descending order is controlled by {@code comparator}.
   */
  public static  Multiset histogram(
      Multiset data, Comparator comparator) {
    final TreeMultiset histogram = TreeMultiset.create(comparator);

    for (final Multiset.Entry entry : data.entrySet()) {
      histogram.add(entry.getCount());
    }

    return histogram;
  }

  /**
   * Retrieves all elements from a {@link Multiset} whose count is greater than or equal to {@code
   * minCount} (which must be non-negative).
   */
  public static  Iterable elementsOccuringAtLeast(final Multiset data, int minCount) {
    checkArgument(minCount >= 0);
    return FluentIterable.from(data.entrySet())
        .filter(MultisetUtils.occursAtLeast(minCount))
        .transform(MultisetUtils.elementOnly());
  }

  /** Guava {@link Function} mapping a multiset entry to the element it wraps. */
  public static  Function, T> elementOnly() {
    return new ElementOnly<>();
  }

  private static final class ElementOnly implements Function, T> {

    @Override
    public T apply(Multiset.Entry entry) {
      return entry.getElement();
    }
  }

  /**
   * Guava {@link Predicate} on {@link Multiset.Entry} which passes only if the entry's count is
   * greater than or equal to {@code n}, which must be non-negative.
   */
  public static  Predicate> occursAtLeast(int n) {
    checkArgument(n >= 0);
    return new OccursAtLeast<>(n);
  }

  private static final class OccursAtLeast implements Predicate> {

    OccursAtLeast(int n) {
      this.n = n;
    }

    @Override
    public boolean apply(Multiset.Entry entry) {
      return entry.getCount() >= n;
    }

    private final int n;
  }

  /**
   * An ordering of {@link Multiset} elements by descending order of count, with ties broken
   * according to the supplied {@code itemComparator}.
   */
  public static  Ordering> byCountDescendingThenItemAscendingOrdering(
      Comparator itemComparator) {
    final Ordering> byCount = byCountOrdering();
    final Ordering> byCountReversed = byCount.reverse();
    final Ordering> byElement = byElementOrdering(itemComparator);
    return byCountReversed.compound(byElement);
  }

  public static  Function, Integer> toCountFunction() {
    return new ToCountFunction<>();
  }

  private static class ToCountFunction implements Function, Integer> {

    @Override
    public Integer apply(final Multiset.Entry input) {
      return input.getCount();
    }
  }

  /**
   * Returns the partial {@link Ordering} over {@link Multiset.Entry}s resulting from applying the
   * supplied {@code comparator} to the multiset elements.
   */
  public static  Ordering> byElementOrdering(
      Comparator comparator) {
    return Ordering.from(comparator).onResultOf(MultisetUtils.elementOnly());
  }

  /** Returns a partial {@link Ordering} over {@link Multiset.Entry}s by their count. */
  public static  Ordering> byCountOrdering() {
    return Ordering.natural().onResultOf(MultisetUtils.toCountFunction());
  }

  /**
   * Gets the maximum count of any element in a {@link Multiset}. This will be zero if the {@code
   * Multiset} is empty.
   */
  public static int maxCount(final Multiset multiset) {
    int ret = 0;
    for (final Multiset.Entry entry : multiset.entrySet()) {
      if (entry.getCount() > ret) {
        ret = entry.getCount();
      }
    }
    return ret;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy