All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.util.Iterables Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.function.Function;

/**
 * Utilities for helping out with Iterables as Collections is to Collection.
 *
 * NB: Some Iterables returned by methods in this class return Iterators that
 * assume a call to hasNext will precede each call to next.  While this usage
 * is not up to the Java Iterator spec, it should work fine with
 * e.g. the Java enhanced for-loop.
 * 
 * 

* * Methods in Iterators are merged. * * @author dramage * @author dlwh {@link #flatMap(Iterable, Function)} * @author Huy Nguyen ([email protected]) * */ public class Iterables { /** * Transformed view of the given iterable. Returns the output * of the given function when applied to each element of the * iterable. */ public static Iterable transform( final Iterable iterable, final Function function) { return new Iterable() { public Iterator iterator() { return new Iterator() { Iterator inner = iterable.iterator(); public boolean hasNext() { return inner.hasNext(); } public V next() { return function.apply(inner.next()); } public void remove() { inner.remove(); } }; } }; } /** * Filtered view of the given iterable. Returns only those elements * from the iterable for which the given Function returns true. */ public static Iterable filter( final Iterable iterable, final Function accept) { return new Iterable() { public Iterator iterator() { return new Iterator() { Iterator inner = iterable.iterator(); boolean queued = false; T next = null; public boolean hasNext() { prepare(); return queued; } public T next() { prepare(); if (!queued) { throw new RuntimeException("Filter .next() called with no next"); } T rv = next; next = null; queued = false; return rv; } public void prepare() { if (queued) { return; } while (inner.hasNext()) { T next = inner.next(); if (accept.apply(next)) { this.next = next; this.queued = true; return; } } } public void remove() { throw new UnsupportedOperationException(); } }; } }; } /** * Casts all values in the given Iterable to the given type. */ public static Iterable cast( final Iterable iterable, final Class type) { return new Iterable() { public Iterator iterator() { return new Iterator() { Iterator inner = iterable.iterator(); public boolean hasNext() { return inner.hasNext(); } public T next() { return type.cast(inner.next()); } public void remove() { inner.remove(); } }; } }; } /** * Returns a shortened view of an iterator. Returns at most max elements. */ public static Iterable take(T[] array, int max) { return take(Arrays.asList(array),max); } /** * Returns a shortened view of an iterator. Returns at most max elements. */ public static Iterable take( final Iterable iterable, final int max) { return new Iterable() { final Iterator iterator = iterable.iterator(); // @Override public Iterator iterator() { return new Iterator() { int i = 0; // @Override public boolean hasNext() { return i < max && iterator.hasNext(); } // @Override public T next() { i++; return iterator.next(); } // @Override public void remove() { iterator.remove(); } }; } }; } /** * Returns a view of the given data, ignoring the first toDrop elements. */ public static Iterable drop(T[] array, int toDrop) { return drop(Arrays.asList(array),toDrop); } /** * Returns a view of the given data, ignoring the first toDrop elements. */ public static Iterable drop( final Iterable iterable, final int toDrop) { return new Iterable() { final Iterator iterator = iterable.iterator(); // @Override public Iterator iterator() { return new Iterator() { int skipped = 0; // @Override public boolean hasNext() { while (skipped < toDrop && iterator.hasNext()) { iterator.next(); skipped += 1; } return iterator.hasNext(); } // @Override public T next() { while (skipped < toDrop && iterator.hasNext()) { iterator.next(); skipped += 1; } return iterator.next(); } // @Override public void remove() { iterator.remove(); } }; } }; } /** * Chains together an Iterable of Iterables after transforming each one. * Equivalent to Iterables.transform(Iterables.chain(iterables),trans); */ public static Iterable flatMap(final Iterable> iterables, Function trans) { return transform(chain(iterables),trans); } /** * Chains together a set of Iterables of compatible types. Returns all * elements of the first iterable, then all of the second, then the third, * etc. */ public static Iterable chain(final Iterable> iterables) { return new Iterable() { public Iterator iterator() { final Iterator> iterators = iterables.iterator(); return new Iterator() { private Iterator current = null; public boolean hasNext() { // advance current iterator if necessary, return false at end while (current == null || !current.hasNext()) { if (iterators.hasNext()) { current = iterators.next().iterator(); } else { return false; } } return true; } public T next() { return current.next(); } public void remove() { current.remove(); } }; } }; } /** * Chains together all Iterables of type T as given in an array or * varargs parameter. */ public static Iterable chain(final Iterable ... iterables) { return chain(Arrays.asList(iterables)); } /** * Chains together all arrays of type T[] as given in an array or * varargs parameter. */ public static Iterable chain(final T[] ... arrays) { LinkedList> iterables = new LinkedList>(); for (T[] array : arrays) { iterables.add(Arrays.asList(array)); } return chain(iterables); } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( final Iterable iter1, final Iterable iter2) { return new Iterable>() { public Iterator> iterator() { return zip(iter1.iterator(), iter2.iterator()); } }; } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( Iterable iter, T2 array[]) { return zip(iter, Arrays.asList(array)); } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( T1 array[], Iterable iter) { return zip(Arrays.asList(array), iter); } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( T1 array1[], T2 array2[]) { return zip(Arrays.asList(array1), Arrays.asList(array2)); } /** * Zips up two iterators into one iterator over Pairs of corresponding * elements. Ends when the shorter iterator ends. */ public static Iterator> zip( final Iterator iter1, final Iterator iter2) { return new Iterator>() { public boolean hasNext() { return iter1.hasNext() && iter2.hasNext(); } public Pair next() { return new Pair(iter1.next(), iter2.next()); } public void remove() { iter1.remove(); iter2.remove(); } }; } /** * A comparator used by the merge functions to determine which of two * iterators to increment by one of the merge functions. * * @param Type of first iterator * @param Type of second iterator */ public interface IncrementComparator { /** * Returns -1 if the value of a should come before the value of b, * +1 if the value of b should come before the value of a, or 0 if * the two should be merged together. */ public int compare(V1 a, V2 b); } /** * Iterates over pairs of objects from two (sorted) iterators such that * each pair a \in iter1, b \in iter2 returned has comparator.compare(a,b)==0. * If the comparator says that a and b are not equal, we increment the * iterator of the smaller value. If the comparator says that a and b are * equal, we return that pair and increment both iterators. * * This is used, e.g. to return lines from two input files that have * the same "key" as determined by the given comparator. * * The comparator will always be passed elements from the first iter as * the first argument. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, final IncrementComparator comparator) { return new Iterable>() { Iterator iterA = iter1.iterator(); Iterator iterB = iter2.iterator(); public Iterator> iterator() { return new Iterator>() { boolean ready = false; Pair pending = null; public boolean hasNext() { if (!ready) { pending = nextPair(); ready = true; } return pending != null; } public Pair next() { if (!ready && !hasNext()) { throw new IllegalAccessError("Called next without hasNext"); } ready = false; return pending; } public void remove() { throw new UnsupportedOperationException("Cannot remove pairs " + "from a merged iterator"); } private Pair nextPair() { V1 nextA = null; V2 nextB = null; while (iterA.hasNext() && iterB.hasNext()) { // increment iterators are null if (nextA == null) { nextA = iterA.next(); } if (nextB == null) { nextB = iterB.next(); } int cmp = comparator.compare(nextA, nextB); if (cmp < 0) { // iterA too small, increment it next time around nextA = null; } else if (cmp > 0) { // iterB too small, increment it next time around nextB = null; } else { // just right - return this pair return new Pair(nextA, nextB); } } return null; } }; } }; } /** * Same as {@link #merge(Iterable, Iterable, IncrementComparator)} but using * the given (symmetric) comparator. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, final Comparator comparator) { final IncrementComparator inc = (a, b) -> comparator.compare(a,b); return merge(iter1, iter2, inc); } /** * Iterates over triples of objects from three (sorted) iterators such that * for every returned triple a (from iter1), b (from iter2), c (from iter3) * satisfies the constraint that comparator.compare(a,b) == * comparator.compare(a,c) == 0. Internally, this function first * calls merge(iter1,iter2,comparatorA), and then merges that iterator * with the iter3 by comparing based on the value returned by iter1. * * This is used, e.g. to return lines from three input files that have * the same "key" as determined by the given comparator. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, final Iterable iter3, final IncrementComparator comparatorA, final IncrementComparator comparatorB) { // partial merge on first two iterables Iterable> partial = merge(iter1, iter2, comparatorA); IncrementComparator,V3> inc = new IncrementComparator,V3>() { public int compare(Pair a, V3 b) { return comparatorB.compare(a.first, b); } }; // flattens the pairs into triple Function, V3>, Triple> flatten = in -> new Triple(in.first.first,in.first.second,in.second); return transform(merge(partial, iter3, inc), flatten); } /** * Same as {@link #merge(Iterable, Iterable, Iterable, IncrementComparator, IncrementComparator)} * but using the given (symmetric) comparator. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, Iterable iter3, final Comparator comparator) { final IncrementComparator inc = (a, b) -> comparator.compare(a,b); return merge(iter1, iter2, iter3, inc, inc); } /** * Groups consecutive elements from the given iterable based on the value * in the given comparator. Each inner iterable will iterate over consecutive * items from the input until the comparator says that the next item is not * equal to the previous. */ public static Iterable> group(final Iterable iterable, final Comparator comparator) { return new Iterable>() { public Iterator> iterator() { return new Iterator>() { /** Actual iterator */ Iterator it = iterable.iterator(); /** Next element to return */ V next; public boolean hasNext() { return next != null || it.hasNext(); } public Iterable next() { return () -> new Iterator() { V last = null; public boolean hasNext() { // get next if we need to and one is available if (next == null && it.hasNext()) { next = it.next(); } // if next and last both have values, compare them if (last != null && next != null) { return comparator.compare(last, next) == 0; } // one of them was not null - have more if it was next return next != null; } public V next() { if (!hasNext()) { throw new IllegalStateException("Didn't have next"); } V rv = next; last = next; next = null; return rv; } public void remove() { throw new UnsupportedOperationException(); } }; } public void remove() { throw new UnsupportedOperationException(); } }; } }; } /** * Returns a string representation of the contents of calling toString * on each element of the given iterable, joining the elements together * with the given glue. */ public static String toString(Iterable iter, String glue) { StringBuilder builder = new StringBuilder(); for (Iterator it = iter.iterator(); it.hasNext(); ) { builder.append(it.next()); if (it.hasNext()) { builder.append(glue); } } return builder.toString(); } /** * Sample k items uniformly from an Iterable of size n (without replacement). * * @param items The items from which to sample. * @param n The total number of items in the Iterable. * @param k The number of items to sample. * @param random The random number generator. * @return An Iterable of k items, chosen randomly from the original n items. */ public static Iterable sample(Iterable items, int n, int k, Random random) { // assemble a list of all indexes List indexes = new ArrayList(); for (int i = 0; i < n; ++i) { indexes.add(i); } // shuffle the indexes and select the first k Collections.shuffle(indexes, random); final Set indexSet = Generics.newHashSet(indexes.subList(0, k)); // filter down to only the items at the selected indexes return Iterables.filter(items, new Function() { private int index = -1; public Boolean apply(T item) { ++this.index; return indexSet.contains(this.index); } }); } // /** // * Returns a dummy collection wrapper for the Iterable that iterates // * it once to get the size if requested. If the underlying iterable // * cannot be iterated more than once, you're out of luck. // */ // public static Collection toCollection(final Iterable iter) { // return new AbstractCollection() { // int size = -1; // // @Override // public Iterator iterator() { // return iter.iterator(); // } // // @Override // public int size() { // if (size < 0) { // size = 0; // for (E elem : iter) { size++; } // } // return size; // } // }; // } // // public static > L toList(Iterable iter, Class type) { // try { // type.newInstance(); // } catch (InstantiationException e) { // e.printStackTrace(); // } catch (IllegalAccessException e) { // e.printStackTrace(); // } // } /** * Creates an ArrayList containing all of the Objects returned by the given Iterator. */ public static ArrayList asArrayList(Iterator iter) { ArrayList al = new ArrayList(); return (ArrayList) addAll(iter, al); } /** * Creates a HashSet containing all of the Objects returned by the given Iterator. */ public static HashSet asHashSet(Iterator iter) { HashSet hs = new HashSet(); return (HashSet) addAll(iter, hs); } /** * Creates a new Collection from the given CollectionFactory, and adds all of the Objects * returned by the given Iterator. */ public static Collection asCollection(Iterator iter, CollectionFactory cf) { Collection c = cf.newCollection(); return addAll(iter, c); } /** * Adds all of the Objects returned by the given Iterator into the given Collection. * * @return the given Collection */ public static Collection addAll(Iterator iter, Collection c) { while (iter.hasNext()) { c.add(iter.next()); } return c; } /** * For internal debugging purposes only. */ public static void main(String[] args) { String[] test = {"a", "b", "c"}; List l = Arrays.asList(test); System.out.println(asArrayList(l.iterator())); System.out.println(asHashSet(l.iterator())); System.out.println(asCollection(l.iterator(), CollectionFactory.hashSetFactory())); ArrayList al = new ArrayList(); al.add("d"); System.out.println(addAll(l.iterator(), al)); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy