All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.collections.CollectionUtils Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.collections;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.math.IntMath;
import java.math.RoundingMode;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Utilities for collections.
 *
 * @author Ryan Gabbard, Constantine Lignos
 */
public final class CollectionUtils {

  private CollectionUtils() {
    throw new UnsupportedOperationException();
  }

  /**
   * Takes some collections and creates a map from their elements to which collections contain them.
   * The Collections must be disjoint or an {@link java.lang.IllegalArgumentException} will be
   * thrown.
   */
  public static > Map makeElementsToContainersMap(
      final Iterable collections) {
    final ImmutableMap.Builder ret = ImmutableMap.builder();

    for (final C collection : collections) {
      for (final T item : collection) {
        ret.put(item, collection);
      }
    }

    return ret.build();
  }

  /**
   * Takes some collections and creates a {@link com.google.common.collect.ListMultimap} from their
   * elements to which collections contain them. Unlike {@link
   * #makeElementsToContainersMap(Iterable)}, the same element may appear in multiple collections.
   * However, {@code null} may not appear in any of the collections. The returned multimap is a
   * {@link com.google.common.collect.ListMultimap} to avoid having to do potentially expensive
   * comparisons between the sets; we know there will be no duplicates because the input collections
   * are sets.
   */
  public static >
      ImmutableListMultimap makeSetElementsToContainersMultimap(final Iterable sets) {
    // because these are sets we can safely use a list multimap without having to worry
    // about duplicates
    final ImmutableListMultimap.Builder ret = ImmutableListMultimap.builder();

    for (final C set : sets) {
      for (final V item : set) {
        ret.put(item, set);
      }
    }

    return ret.build();
  }

  /**
   * Returns a new Multiset resulting from transforming each element of the input Multiset by a
   * function. If two or more elements are mapped to the same value by the function, their counts
   * will be summed in the new Multiset.
   */
  public static  ImmutableMultiset transformedCopy(Multiset ms, Function func) {
    final ImmutableMultiset.Builder ret = ImmutableMultiset.builder();

    for (final Multiset.Entry entry : ms.entrySet()) {
      final B transformedElement = func.apply(entry.getElement());
      ret.addCopies(transformedElement, entry.getCount());
    }

    return ret.build();
  }

  /** Same as transformedCopy, except the returned Multiset is mutable. */
  public static  Multiset mutableTransformedCopy(Multiset ms, Function func) {
    final Multiset ret = HashMultiset.create();

    for (final Multiset.Entry entry : ms.entrySet()) {
      final B transformedElement = func.apply(entry.getElement());
      ret.add(transformedElement, entry.getCount());
    }

    return ret;
  }

  /**
   * Gets a sublist of a list, truncated at the end of the list if too many elements are selected.
   * This behaves exactly like List.subList, including all notes in its Javadoc concerning
   * structural modification of the backing List, etc. with one difference: if the end index is
   * beyond the end of the list, instead of throwing an exception, the sublist simply stops at the
   * end of the list. After the fifth or so time writing this idiom, it seems worth having a
   * function for. :-)
   */
  public static  List truncatedSubList(List inList, int start, int end) {
    // List.sublist will do our error checking for us
    final int limit = Math.min(end, inList.size());
    return inList.subList(start, limit);
  }

  /**
   * Returns true if and only if all the collections in the provided list have the same size.
   * Returns true if the provided list is empty.
   */
  public static boolean allSameSize(List> collections) {
    if (collections.isEmpty()) {
      return true;
    }
    final int referenceSize = collections.get(0).size();
    for (final Collection col : collections) {
      if (col.size() != referenceSize) {
        return false;
      }
    }
    return true;
  }

  /** Guava function to get the value of a {@link com.google.common.collect.Table} cell. */
  public static  Function, V> TableCellValue() {
    return new Function, V>() {
      @Override
      public V apply(Table.Cell input) {
        return input.getValue();
      }
    };
  }

  private static final Function, Integer> SIZE =
      new Function, Integer>() {
        @Override
        public Integer apply(Collection input) {
          return input.size();
        }
      };

  /** Guava function to map a collection to its size. */
  public static Function, Integer> sizeFunction() {
    return SIZE;
  }

  /**
   * Like {@link Collections#max(java.util.Collection)} except with a default value returned in the
   * case of an empty collection.
   */
  public static > T maxOr(Collection values, T defaultVal) {
    if (values.isEmpty()) {
      return defaultVal;
    } else {
      return Collections.max(values);
    }
  }

  /**
   * Like {@link Collections#min(java.util.Collection)} except with a default value returned in the
   * case of an empty collection.
   */
  public static > T minOr(Collection values, T defaultVal) {
    if (values.isEmpty()) {
      return defaultVal;
    } else {
      return Collections.min(values);
    }
  }

  /**
   * Provides a {@link com.google.common.base.Function} which will create a set containing the same
   * elements as the supplied collection.
   */
  public static  Function, ImmutableSet> asSetFunction() {
    return new Function, ImmutableSet>() {
      @Override
      public ImmutableSet apply(Collection input) {
        return ImmutableSet.copyOf(input);
      }
    };
  }

  /**
   * A copy of the input as an {@link com.google.common.collect.ImmutableList} which respects
   * iteration order, but where only the first occurrence of each element is kept. No input items
   * may be {@code null}.
   */
  public static  ImmutableList asUniquedList(Iterable items) {
    return ImmutableSet.copyOf(items).asList();
  }

  /** Turns null into an empty list and leaves other inputs untouched. */
  public static  List coerceNullToEmpty(List list) {
    return MoreObjects.firstNonNull(list, ImmutableList.of());
  }

  /** A Guava {@link Predicate} which calls {@link Collection#isEmpty()} on provided collections. */
  public static Predicate> isEmptyPredicate() {
    return new Predicate>() {
      @Override
      public boolean apply(final Collection input) {
        return input.isEmpty();
      }
    };
  }

  /**
   * Partitions a list into the specified number of partitions as evenly as is possible. The final
   * "extra" elements that cannot be evenly distributed are distributed starting with the first
   * partitions. For example, three partitions of (1, 2, 3, 4) results in ((1, 4), (2), (3)). Unlike
   * {@link Lists#partition(List, int)}, this returns {@link ImmutableList}s, not list views, and
   * computations are computed eagerly.
   *
   * @param partitions the number of partitions to divide the list into
   * @return a list of the partitions, which are themselves lists
   */
  public static  ImmutableList> partitionAlmostEvenly(
      final List list, final int partitions) {
    checkNotNull(list);
    checkArgument(partitions > 0, "Number of partitions must be positive");
    checkArgument(
        partitions <= list.size(), "Cannot request more partitions than there are list items");

    // Divide into partitions, with the remainder falling into the extra partitions
    final List> prelimPartitions =
        Lists.partition(list, IntMath.divide(list.size(), partitions, RoundingMode.DOWN));
    // Create output
    final ImmutableList.Builder> ret = ImmutableList.builder();

    // If we evenly partitioned, just do the type conversion and return. The type conversion is
    // performed because Lists#partition returns list views.
    if (prelimPartitions.size() == partitions) {
      for (List partition : prelimPartitions) {
        ret.add(ImmutableList.copyOf(partition));
      }
    } else {
      // Otherwise, distribute the extras

      // Make a builder for each output partition and the extras. The extras are anything in the
      // preliminary partitions after the number of partitions we actually want. Thus, the
      // first index of extras is the same as the number of partitions.
      final ImmutableList.Builder> builderOfBuilders =
          ImmutableList.builder();
      final ImmutableList.Builder extrasBuilder = ImmutableList.builder();
      for (int i = 0; i < prelimPartitions.size(); i++) {
        if (i < partitions) {
          builderOfBuilders.add(ImmutableList.builder().addAll(prelimPartitions.get(i)));
        } else {
          extrasBuilder.addAll(prelimPartitions.get(i));
        }
      }
      final ImmutableList> builders = builderOfBuilders.build();
      final ImmutableList extras = extrasBuilder.build();

      // Distribute the extra elements. We cannot overrun the bounds of builders because the number
      // of extras is always at least one less than the number of builders (otherwise, we would've
      // just had larger partitions).
      int partitionIdx = 0;
      for (E item : extras) {
        builders.get(partitionIdx++).add(item);
      }

      // Fill in output
      for (ImmutableList.Builder builder : builders) {
        ret.add(builder.build());
      }
    }

    final ImmutableList> finalPartitions = ret.build();
    checkState(
        finalPartitions.size() == partitions,
        "Partitioning failed: number of output partitions ("
            + finalPartitions.size()
            + ") does not match requested number ("
            + partitions
            + ")");

    return finalPartitions;
  }

  /**
   * Checks if two {@link Iterable}s contain the same elements regardless of order or number of
   * occurrences. If they do, nothing happens. If they don't an {@link IllegalStateException} is
   * raised describing the difference. The exception's message will be prefixed by {@code
   * msgIntroduction} and {@code leftNamePlural} and {@code rightNamePlural} will be used to
   * describe the left and right element types.
   *
   * 

Example: * *

   *  assertSameElementsOrIllegalState(mySet1, mySet2, "Document IDs do not match: " ,
   *       "gold docIDs", "test docIDs");
   * 
*/ public static void assertSameElementsOrIllegalState( Iterable left, Iterable right, String msgIntroduction, String leftName, String rightName) { final Optional exceptionMessage = assertSameElementsCommon(left, right, msgIntroduction, leftName, rightName); if (exceptionMessage.isPresent()) { throw new IllegalStateException(exceptionMessage.get()); } } public static void assertSameElementsOrIllegalArgument( Iterable left, Iterable right, String msgIntroduction, String leftName, String rightName) { final Optional exceptionMessage = assertSameElementsCommon(left, right, msgIntroduction, leftName, rightName); if (exceptionMessage.isPresent()) { throw new IllegalArgumentException(exceptionMessage.get()); } } private static Optional assertSameElementsCommon( final Iterable left, final Iterable right, final String msgIntroduction, final String leftName, final String rightName) { final Optional exceptionMessage; final ImmutableSet leftSet = ImmutableSet.copyOf(left); final ImmutableSet rightSet = ImmutableSet.copyOf(right); if (!leftSet.equals(rightSet)) { final StringBuilder exceptionMsg = new StringBuilder(); final ImmutableSet leftOnly = Sets.difference(leftSet, rightSet).immutableCopy(); final ImmutableSet rightOnly = Sets.difference(rightSet, leftSet).immutableCopy(); exceptionMsg.append(msgIntroduction); if (!leftOnly.isEmpty()) { exceptionMsg .append(" ") .append(leftOnly.size()) .append(" ") .append(leftName) .append(" only: ") .append(leftOnly) .append(". "); } if (!rightOnly.isEmpty()) { exceptionMsg .append(" ") .append(rightOnly.size()) .append(" ") .append(rightName) .append(" only: ") .append(rightOnly) .append(". "); } exceptionMessage = Optional.of(exceptionMsg.toString()); } else { exceptionMessage = Optional.absent(); } return exceptionMessage; } /** * Acts like {@code items.toString()} unless {@code items} has more elements than {@code limit}, * in which case it simply prints the number of excess items. This is useful for making toStrings * for objects which may contain large collections. */ public static String toStringLimited(ImmutableSet items, int limit) { if (items.size() <= limit) { return items.toString(); } else { return items.asList().subList(0, limit).toString() + "... (" + (items.size() - limit) + " more)"; } } }