edu.stanford.nlp.util.Iterables Maven / Gradle / Ivy
package edu.stanford.nlp.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.function.Function;
/**
* Utilities for helping out with Iterables as Collections is to Collection.
*
* NB: Some Iterables returned by methods in this class return Iterators that
* assume a call to hasNext will precede each call to next. While this usage
* is not up to the Java Iterator spec, it should work fine with
* e.g. the Java enhanced for-loop.
*
*
*
* Methods in Iterators are merged.
*
* @author dramage
* @author dlwh {@link #flatMap(Iterable, Function)}
* @author Huy Nguyen ([email protected])
*
*/
public class Iterables {
/**
* Transformed view of the given iterable. Returns the output
* of the given function when applied to each element of the
* iterable.
*/
public static Iterable transform(
final Iterable iterable, final Function super K,? extends V> function) {
return new Iterable() {
public Iterator iterator() {
return new Iterator() {
Iterator inner = iterable.iterator();
public boolean hasNext() {
return inner.hasNext();
}
public V next() {
return function.apply(inner.next());
}
public void remove() {
inner.remove();
}
};
}
};
}
/**
* Filtered view of the given iterable. Returns only those elements
* from the iterable for which the given Function returns true.
*/
public static Iterable filter(
final Iterable iterable, final Function accept) {
return new Iterable() {
public Iterator iterator() {
return new Iterator() {
Iterator inner = iterable.iterator();
boolean queued = false;
T next = null;
public boolean hasNext() {
prepare();
return queued;
}
public T next() {
prepare();
if (!queued) {
throw new RuntimeException("Filter .next() called with no next");
}
T rv = next;
next = null;
queued = false;
return rv;
}
public void prepare() {
if (queued) {
return;
}
while (inner.hasNext()) {
T next = inner.next();
if (accept.apply(next)) {
this.next = next;
this.queued = true;
return;
}
}
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
/**
* Casts all values in the given Iterable to the given type.
*/
public static Iterable cast(
final Iterable> iterable, final Class extends T> type) {
return new Iterable() {
public Iterator iterator() {
return new Iterator() {
Iterator> inner = iterable.iterator();
public boolean hasNext() {
return inner.hasNext();
}
public T next() {
return type.cast(inner.next());
}
public void remove() {
inner.remove();
}
};
}
};
}
/**
* Returns a shortened view of an iterator. Returns at most max
elements.
*/
public static Iterable take(T[] array, int max) {
return take(Arrays.asList(array),max);
}
/**
* Returns a shortened view of an iterator. Returns at most max
elements.
*/
public static Iterable take(
final Iterable iterable, final int max) {
return new Iterable() {
final Iterator iterator = iterable.iterator();
// @Override
public Iterator iterator() {
return new Iterator() {
int i = 0;
// @Override
public boolean hasNext() {
return i < max && iterator.hasNext();
}
// @Override
public T next() {
i++;
return iterator.next();
}
// @Override
public void remove() {
iterator.remove();
}
};
}
};
}
/**
* Returns a view of the given data, ignoring the first toDrop elements.
*/
public static Iterable drop(T[] array, int toDrop) {
return drop(Arrays.asList(array),toDrop);
}
/**
* Returns a view of the given data, ignoring the first toDrop elements.
*/
public static Iterable drop(
final Iterable iterable, final int toDrop) {
return new Iterable() {
final Iterator iterator = iterable.iterator();
// @Override
public Iterator iterator() {
return new Iterator() {
int skipped = 0;
// @Override
public boolean hasNext() {
while (skipped < toDrop && iterator.hasNext()) {
iterator.next();
skipped += 1;
}
return iterator.hasNext();
}
// @Override
public T next() {
while (skipped < toDrop && iterator.hasNext()) {
iterator.next();
skipped += 1;
}
return iterator.next();
}
// @Override
public void remove() {
iterator.remove();
}
};
}
};
}
/**
* Chains together an Iterable of Iterables after transforming each one.
* Equivalent to Iterables.transform(Iterables.chain(iterables),trans);
*/
public static Iterable flatMap(final Iterable extends Iterable> iterables, Function super T,U> trans) {
return transform(chain(iterables),trans);
}
/**
* Chains together a set of Iterables of compatible types. Returns all
* elements of the first iterable, then all of the second, then the third,
* etc.
*/
public static Iterable chain(final Iterable extends Iterable> iterables) {
return new Iterable() {
public Iterator iterator() {
final Iterator extends Iterable> iterators = iterables.iterator();
return new Iterator() {
private Iterator current = null;
public boolean hasNext() {
// advance current iterator if necessary, return false at end
while (current == null || !current.hasNext()) {
if (iterators.hasNext()) {
current = iterators.next().iterator();
} else {
return false;
}
}
return true;
}
public T next() {
return current.next();
}
public void remove() {
current.remove();
}
};
}
};
}
/**
* Chains together all Iterables of type T as given in an array or
* varargs parameter.
*/
public static Iterable chain(final Iterable ... iterables) {
return chain(Arrays.asList(iterables));
}
/**
* Chains together all arrays of type T[] as given in an array or
* varargs parameter.
*/
public static Iterable chain(final T[] ... arrays) {
LinkedList> iterables = new LinkedList<>();
for (T[] array : arrays) {
iterables.add(Arrays.asList(array));
}
return chain(iterables);
}
/**
* Zips two iterables into one iterable over Pairs of corresponding
* elements in the two underlying iterables. Ends when the shorter
* iterable ends.
*/
public static Iterable> zip(
final Iterable iter1, final Iterable iter2) {
return new Iterable>() {
public Iterator> iterator() {
return zip(iter1.iterator(), iter2.iterator());
}
};
}
/**
* Zips two iterables into one iterable over Pairs of corresponding
* elements in the two underlying iterables. Ends when the shorter
* iterable ends.
*/
public static Iterable> zip(
Iterable iter, T2 array[]) {
return zip(iter, Arrays.asList(array));
}
/**
* Zips two iterables into one iterable over Pairs of corresponding
* elements in the two underlying iterables. Ends when the shorter
* iterable ends.
*/
public static Iterable> zip(
T1 array[], Iterable iter) {
return zip(Arrays.asList(array), iter);
}
/**
* Zips two iterables into one iterable over Pairs of corresponding
* elements in the two underlying iterables. Ends when the shorter
* iterable ends.
*/
public static Iterable> zip(
T1 array1[], T2 array2[]) {
return zip(Arrays.asList(array1), Arrays.asList(array2));
}
/**
* Zips up two iterators into one iterator over Pairs of corresponding
* elements. Ends when the shorter iterator ends.
*/
public static Iterator> zip(
final Iterator iter1, final Iterator iter2) {
return new Iterator>() {
public boolean hasNext() {
return iter1.hasNext() && iter2.hasNext();
}
public Pair next() {
return new Pair<>(iter1.next(), iter2.next());
}
public void remove() {
iter1.remove();
iter2.remove();
}
};
}
/**
* A comparator used by the merge functions to determine which of two
* iterators to increment by one of the merge functions.
*
* @param Type of first iterator
* @param Type of second iterator
*/
public interface IncrementComparator {
/**
* Returns -1 if the value of a should come before the value of b,
* +1 if the value of b should come before the value of a, or 0 if
* the two should be merged together.
*/
public int compare(V1 a, V2 b);
}
/**
* Iterates over pairs of objects from two (sorted) iterators such that
* each pair a \in iter1, b \in iter2 returned has comparator.compare(a,b)==0.
* If the comparator says that a and b are not equal, we increment the
* iterator of the smaller value. If the comparator says that a and b are
* equal, we return that pair and increment both iterators.
*
* This is used, e.g. to return lines from two input files that have
* the same "key" as determined by the given comparator.
*
* The comparator will always be passed elements from the first iter as
* the first argument.
*/
public static Iterable> merge(
final Iterable iter1, final Iterable iter2,
final IncrementComparator comparator) {
return new Iterable>() {
Iterator iterA = iter1.iterator();
Iterator iterB = iter2.iterator();
public Iterator> iterator() {
return new Iterator>() {
boolean ready = false;
Pair pending = null;
public boolean hasNext() {
if (!ready) {
pending = nextPair();
ready = true;
}
return pending != null;
}
public Pair next() {
if (!ready && !hasNext()) {
throw new IllegalAccessError("Called next without hasNext");
}
ready = false;
return pending;
}
public void remove() {
throw new UnsupportedOperationException("Cannot remove pairs " +
"from a merged iterator");
}
private Pair nextPair() {
V1 nextA = null;
V2 nextB = null;
while (iterA.hasNext() && iterB.hasNext()) {
// increment iterators are null
if (nextA == null) { nextA = iterA.next(); }
if (nextB == null) { nextB = iterB.next(); }
int cmp = comparator.compare(nextA, nextB);
if (cmp < 0) {
// iterA too small, increment it next time around
nextA = null;
} else if (cmp > 0) {
// iterB too small, increment it next time around
nextB = null;
} else {
// just right - return this pair
return new Pair<>(nextA, nextB);
}
}
return null;
}
};
}
};
}
/**
* Same as {@link #merge(Iterable, Iterable, IncrementComparator)} but using
* the given (symmetric) comparator.
*/
public static Iterable> merge(
final Iterable iter1, final Iterable iter2,
final Comparator comparator) {
final IncrementComparator inc = (a, b) -> comparator.compare(a,b);
return merge(iter1, iter2, inc);
}
/**
* Iterates over triples of objects from three (sorted) iterators such that
* for every returned triple a (from iter1), b (from iter2), c (from iter3)
* satisfies the constraint that comparator.compare(a,b) ==
* comparator.compare(a,c) == 0
. Internally, this function first
* calls merge(iter1,iter2,comparatorA), and then merges that iterator
* with the iter3 by comparing based on the value returned by iter1.
*
* This is used, e.g. to return lines from three input files that have
* the same "key" as determined by the given comparator.
*/
public static Iterable> merge(
final Iterable iter1, final Iterable iter2, final Iterable iter3,
final IncrementComparator comparatorA,
final IncrementComparator comparatorB) {
// partial merge on first two iterables
Iterable> partial = merge(iter1, iter2, comparatorA);
IncrementComparator,V3> inc =
new IncrementComparator,V3>() {
public int compare(Pair a, V3 b) {
return comparatorB.compare(a.first, b);
}
};
// flattens the pairs into triple
Function, V3>, Triple> flatten =
in -> new Triple<>(in.first.first, in.first.second, in.second);
return transform(merge(partial, iter3, inc), flatten);
}
/**
* Same as {@link #merge(Iterable, Iterable, Iterable, IncrementComparator, IncrementComparator)}
* but using the given (symmetric) comparator.
*/
public static Iterable> merge(
final Iterable iter1, final Iterable iter2, Iterable iter3,
final Comparator comparator) {
final IncrementComparator inc = (a, b) -> comparator.compare(a,b);
return merge(iter1, iter2, iter3, inc, inc);
}
/**
* Groups consecutive elements from the given iterable based on the value
* in the given comparator. Each inner iterable will iterate over consecutive
* items from the input until the comparator says that the next item is not
* equal to the previous.
*/
public static Iterable> group(final Iterable iterable,
final Comparator comparator) {
return new Iterable>() {
public Iterator> iterator() {
return new Iterator>() {
/** Actual iterator */
Iterator it = iterable.iterator();
/** Next element to return */
V next;
public boolean hasNext() {
return next != null || it.hasNext();
}
public Iterable next() {
return () -> new Iterator() {
V last = null;
public boolean hasNext() {
// get next if we need to and one is available
if (next == null && it.hasNext()) {
next = it.next();
}
// if next and last both have values, compare them
if (last != null && next != null) {
return comparator.compare(last, next) == 0;
}
// one of them was not null - have more if it was next
return next != null;
}
public V next() {
if (!hasNext()) {
throw new IllegalStateException("Didn't have next");
}
V rv = next;
last = next;
next = null;
return rv;
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
/**
* Returns a string representation of the contents of calling toString
* on each element of the given iterable, joining the elements together
* with the given glue.
*/
public static String toString(Iterable iter, String glue) {
StringBuilder builder = new StringBuilder();
for (Iterator it = iter.iterator(); it.hasNext(); ) {
builder.append(it.next());
if (it.hasNext()) {
builder.append(glue);
}
}
return builder.toString();
}
/**
* Sample k items uniformly from an Iterable of size n (without replacement).
*
* @param items The items from which to sample.
* @param n The total number of items in the Iterable.
* @param k The number of items to sample.
* @param random The random number generator.
* @return An Iterable of k items, chosen randomly from the original n items.
*/
public static Iterable sample(Iterable items, int n, int k, Random random) {
// assemble a list of all indexes
List indexes = new ArrayList<>();
for (int i = 0; i < n; ++i) {
indexes.add(i);
}
// shuffle the indexes and select the first k
Collections.shuffle(indexes, random);
final Set indexSet = Generics.newHashSet(indexes.subList(0, k));
// filter down to only the items at the selected indexes
return Iterables.filter(items, new Function() {
private int index = -1;
public Boolean apply(T item) {
++this.index;
return indexSet.contains(this.index);
}
});
}
// /**
// * Returns a dummy collection wrapper for the Iterable that iterates
// * it once to get the size if requested. If the underlying iterable
// * cannot be iterated more than once, you're out of luck.
// */
// public static Collection toCollection(final Iterable iter) {
// return new AbstractCollection() {
// int size = -1;
//
// @Override
// public Iterator iterator() {
// return iter.iterator();
// }
//
// @Override
// public int size() {
// if (size < 0) {
// size = 0;
// for (E elem : iter) { size++; }
// }
// return size;
// }
// };
// }
//
// public static > L toList(Iterable iter, Class type) {
// try {
// type.newInstance();
// } catch (InstantiationException e) {
// e.printStackTrace();
// } catch (IllegalAccessException e) {
// e.printStackTrace();
// }
// }
/**
* Creates an ArrayList containing all of the Objects returned by the given Iterator.
*/
public static ArrayList asArrayList(Iterator extends T> iter) {
ArrayList al = new ArrayList<>();
return (ArrayList) addAll(iter, al);
}
/**
* Creates a HashSet containing all of the Objects returned by the given Iterator.
*/
public static HashSet asHashSet(Iterator extends T> iter) {
HashSet hs = new HashSet<>();
return (HashSet) addAll(iter, hs);
}
/**
* Creates a new Collection from the given CollectionFactory, and adds all of the Objects
* returned by the given Iterator.
*/
public static Collection asCollection(Iterator extends E> iter, CollectionFactory cf) {
Collection c = cf.newCollection();
return addAll(iter, c);
}
/**
* Adds all of the Objects returned by the given Iterator into the given Collection.
*
* @return the given Collection
*/
public static Collection addAll(Iterator extends T> iter, Collection c) {
while (iter.hasNext()) {
c.add(iter.next());
}
return c;
}
/**
* For internal debugging purposes only.
*/
public static void main(String[] args) {
String[] test = {"a", "b", "c"};
List l = Arrays.asList(test);
System.out.println(asArrayList(l.iterator()));
System.out.println(asHashSet(l.iterator()));
System.out.println(asCollection(l.iterator(), CollectionFactory.hashSetFactory()));
ArrayList al = new ArrayList<>();
al.add("d");
System.out.println(addAll(l.iterator(), al));
}
}