All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.util.Iterables Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.function.Function;

/**
 * Utilities for helping out with Iterables as Collections is to Collection.
 *
 * NB: Some Iterables returned by methods in this class return Iterators that
 * assume a call to hasNext will precede each call to next.  While this usage
 * is not up to the Java Iterator spec, it should work fine with
 * e.g. the Java enhanced for-loop.
 * 
 * 

* * Methods in Iterators are merged. * * @author dramage * @author dlwh {@link #flatMap(Iterable, Function)} * @author Huy Nguyen ([email protected]) * */ public class Iterables { /** * Transformed view of the given iterable. Returns the output * of the given function when applied to each element of the * iterable. */ public static Iterable transform( final Iterable iterable, final Function function) { return new Iterable() { public Iterator iterator() { return new Iterator() { Iterator inner = iterable.iterator(); public boolean hasNext() { return inner.hasNext(); } public V next() { return function.apply(inner.next()); } public void remove() { inner.remove(); } }; } }; } /** * Filtered view of the given iterable. Returns only those elements * from the iterable for which the given Function returns true. */ public static Iterable filter( final Iterable iterable, final Function accept) { return new Iterable() { public Iterator iterator() { return new Iterator() { Iterator inner = iterable.iterator(); boolean queued = false; T next = null; public boolean hasNext() { prepare(); return queued; } public T next() { prepare(); if (!queued) { throw new RuntimeException("Filter .next() called with no next"); } T rv = next; next = null; queued = false; return rv; } public void prepare() { if (queued) { return; } while (inner.hasNext()) { T next = inner.next(); if (accept.apply(next)) { this.next = next; this.queued = true; return; } } } public void remove() { throw new UnsupportedOperationException(); } }; } }; } /** * Casts all values in the given Iterable to the given type. */ public static Iterable cast( final Iterable iterable, final Class type) { return new Iterable() { public Iterator iterator() { return new Iterator() { Iterator inner = iterable.iterator(); public boolean hasNext() { return inner.hasNext(); } public T next() { return type.cast(inner.next()); } public void remove() { inner.remove(); } }; } }; } /** * Returns a shortened view of an iterator. Returns at most max elements. */ public static Iterable take(T[] array, int max) { return take(Arrays.asList(array),max); } /** * Returns a shortened view of an iterator. Returns at most max elements. */ public static Iterable take( final Iterable iterable, final int max) { return new Iterable() { final Iterator iterator = iterable.iterator(); // @Override public Iterator iterator() { return new Iterator() { int i = 0; // @Override public boolean hasNext() { return i < max && iterator.hasNext(); } // @Override public T next() { i++; return iterator.next(); } // @Override public void remove() { iterator.remove(); } }; } }; } /** * Returns a view of the given data, ignoring the first toDrop elements. */ public static Iterable drop(T[] array, int toDrop) { return drop(Arrays.asList(array),toDrop); } /** * Returns a view of the given data, ignoring the first toDrop elements. */ public static Iterable drop( final Iterable iterable, final int toDrop) { return new Iterable() { final Iterator iterator = iterable.iterator(); // @Override public Iterator iterator() { return new Iterator() { int skipped = 0; // @Override public boolean hasNext() { while (skipped < toDrop && iterator.hasNext()) { iterator.next(); skipped += 1; } return iterator.hasNext(); } // @Override public T next() { while (skipped < toDrop && iterator.hasNext()) { iterator.next(); skipped += 1; } return iterator.next(); } // @Override public void remove() { iterator.remove(); } }; } }; } /** * Chains together an Iterable of Iterables after transforming each one. * Equivalent to Iterables.transform(Iterables.chain(iterables),trans); */ public static Iterable flatMap(final Iterable> iterables, Function trans) { return transform(chain(iterables),trans); } /** * Chains together a set of Iterables of compatible types. Returns all * elements of the first iterable, then all of the second, then the third, * etc. */ public static Iterable chain(final Iterable> iterables) { return new Iterable() { public Iterator iterator() { final Iterator> iterators = iterables.iterator(); return new Iterator() { private Iterator current = null; public boolean hasNext() { // advance current iterator if necessary, return false at end while (current == null || !current.hasNext()) { if (iterators.hasNext()) { current = iterators.next().iterator(); } else { return false; } } return true; } public T next() { return current.next(); } public void remove() { current.remove(); } }; } }; } /** * Chains together all Iterables of type T as given in an array or * varargs parameter. */ public static Iterable chain(final Iterable ... iterables) { return chain(Arrays.asList(iterables)); } /** * Chains together all arrays of type T[] as given in an array or * varargs parameter. */ public static Iterable chain(final T[] ... arrays) { LinkedList> iterables = new LinkedList<>(); for (T[] array : arrays) { iterables.add(Arrays.asList(array)); } return chain(iterables); } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( final Iterable iter1, final Iterable iter2) { return new Iterable>() { public Iterator> iterator() { return zip(iter1.iterator(), iter2.iterator()); } }; } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( Iterable iter, T2 array[]) { return zip(iter, Arrays.asList(array)); } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( T1 array[], Iterable iter) { return zip(Arrays.asList(array), iter); } /** * Zips two iterables into one iterable over Pairs of corresponding * elements in the two underlying iterables. Ends when the shorter * iterable ends. */ public static Iterable> zip( T1 array1[], T2 array2[]) { return zip(Arrays.asList(array1), Arrays.asList(array2)); } /** * Zips up two iterators into one iterator over Pairs of corresponding * elements. Ends when the shorter iterator ends. */ public static Iterator> zip( final Iterator iter1, final Iterator iter2) { return new Iterator>() { public boolean hasNext() { return iter1.hasNext() && iter2.hasNext(); } public Pair next() { return new Pair<>(iter1.next(), iter2.next()); } public void remove() { iter1.remove(); iter2.remove(); } }; } /** * A comparator used by the merge functions to determine which of two * iterators to increment by one of the merge functions. * * @param Type of first iterator * @param Type of second iterator */ public interface IncrementComparator { /** * Returns -1 if the value of a should come before the value of b, * +1 if the value of b should come before the value of a, or 0 if * the two should be merged together. */ public int compare(V1 a, V2 b); } /** * Iterates over pairs of objects from two (sorted) iterators such that * each pair a \in iter1, b \in iter2 returned has comparator.compare(a,b)==0. * If the comparator says that a and b are not equal, we increment the * iterator of the smaller value. If the comparator says that a and b are * equal, we return that pair and increment both iterators. * * This is used, e.g. to return lines from two input files that have * the same "key" as determined by the given comparator. * * The comparator will always be passed elements from the first iter as * the first argument. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, final IncrementComparator comparator) { return new Iterable>() { Iterator iterA = iter1.iterator(); Iterator iterB = iter2.iterator(); public Iterator> iterator() { return new Iterator>() { boolean ready = false; Pair pending = null; public boolean hasNext() { if (!ready) { pending = nextPair(); ready = true; } return pending != null; } public Pair next() { if (!ready && !hasNext()) { throw new IllegalAccessError("Called next without hasNext"); } ready = false; return pending; } public void remove() { throw new UnsupportedOperationException("Cannot remove pairs " + "from a merged iterator"); } private Pair nextPair() { V1 nextA = null; V2 nextB = null; while (iterA.hasNext() && iterB.hasNext()) { // increment iterators are null if (nextA == null) { nextA = iterA.next(); } if (nextB == null) { nextB = iterB.next(); } int cmp = comparator.compare(nextA, nextB); if (cmp < 0) { // iterA too small, increment it next time around nextA = null; } else if (cmp > 0) { // iterB too small, increment it next time around nextB = null; } else { // just right - return this pair return new Pair<>(nextA, nextB); } } return null; } }; } }; } /** * Same as {@link #merge(Iterable, Iterable, IncrementComparator)} but using * the given (symmetric) comparator. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, final Comparator comparator) { final IncrementComparator inc = (a, b) -> comparator.compare(a,b); return merge(iter1, iter2, inc); } /** * Iterates over triples of objects from three (sorted) iterators such that * for every returned triple a (from iter1), b (from iter2), c (from iter3) * satisfies the constraint that comparator.compare(a,b) == * comparator.compare(a,c) == 0. Internally, this function first * calls merge(iter1,iter2,comparatorA), and then merges that iterator * with the iter3 by comparing based on the value returned by iter1. * * This is used, e.g. to return lines from three input files that have * the same "key" as determined by the given comparator. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, final Iterable iter3, final IncrementComparator comparatorA, final IncrementComparator comparatorB) { // partial merge on first two iterables Iterable> partial = merge(iter1, iter2, comparatorA); IncrementComparator,V3> inc = new IncrementComparator,V3>() { public int compare(Pair a, V3 b) { return comparatorB.compare(a.first, b); } }; // flattens the pairs into triple Function, V3>, Triple> flatten = in -> new Triple<>(in.first.first, in.first.second, in.second); return transform(merge(partial, iter3, inc), flatten); } /** * Same as {@link #merge(Iterable, Iterable, Iterable, IncrementComparator, IncrementComparator)} * but using the given (symmetric) comparator. */ public static Iterable> merge( final Iterable iter1, final Iterable iter2, Iterable iter3, final Comparator comparator) { final IncrementComparator inc = (a, b) -> comparator.compare(a,b); return merge(iter1, iter2, iter3, inc, inc); } /** * Groups consecutive elements from the given iterable based on the value * in the given comparator. Each inner iterable will iterate over consecutive * items from the input until the comparator says that the next item is not * equal to the previous. */ public static Iterable> group(final Iterable iterable, final Comparator comparator) { return new Iterable>() { public Iterator> iterator() { return new Iterator>() { /** Actual iterator */ Iterator it = iterable.iterator(); /** Next element to return */ V next; public boolean hasNext() { return next != null || it.hasNext(); } public Iterable next() { return () -> new Iterator() { V last = null; public boolean hasNext() { // get next if we need to and one is available if (next == null && it.hasNext()) { next = it.next(); } // if next and last both have values, compare them if (last != null && next != null) { return comparator.compare(last, next) == 0; } // one of them was not null - have more if it was next return next != null; } public V next() { if (!hasNext()) { throw new IllegalStateException("Didn't have next"); } V rv = next; last = next; next = null; return rv; } public void remove() { throw new UnsupportedOperationException(); } }; } public void remove() { throw new UnsupportedOperationException(); } }; } }; } /** * Returns a string representation of the contents of calling toString * on each element of the given iterable, joining the elements together * with the given glue. */ public static String toString(Iterable iter, String glue) { StringBuilder builder = new StringBuilder(); for (Iterator it = iter.iterator(); it.hasNext(); ) { builder.append(it.next()); if (it.hasNext()) { builder.append(glue); } } return builder.toString(); } /** * Sample k items uniformly from an Iterable of size n (without replacement). * * @param items The items from which to sample. * @param n The total number of items in the Iterable. * @param k The number of items to sample. * @param random The random number generator. * @return An Iterable of k items, chosen randomly from the original n items. */ public static Iterable sample(Iterable items, int n, int k, Random random) { // assemble a list of all indexes List indexes = new ArrayList<>(); for (int i = 0; i < n; ++i) { indexes.add(i); } // shuffle the indexes and select the first k Collections.shuffle(indexes, random); final Set indexSet = Generics.newHashSet(indexes.subList(0, k)); // filter down to only the items at the selected indexes return Iterables.filter(items, new Function() { private int index = -1; public Boolean apply(T item) { ++this.index; return indexSet.contains(this.index); } }); } // /** // * Returns a dummy collection wrapper for the Iterable that iterates // * it once to get the size if requested. If the underlying iterable // * cannot be iterated more than once, you're out of luck. // */ // public static Collection toCollection(final Iterable iter) { // return new AbstractCollection() { // int size = -1; // // @Override // public Iterator iterator() { // return iter.iterator(); // } // // @Override // public int size() { // if (size < 0) { // size = 0; // for (E elem : iter) { size++; } // } // return size; // } // }; // } // // public static > L toList(Iterable iter, Class type) { // try { // type.newInstance(); // } catch (InstantiationException e) { // e.printStackTrace(); // } catch (IllegalAccessException e) { // e.printStackTrace(); // } // } /** * Creates an ArrayList containing all of the Objects returned by the given Iterator. */ public static ArrayList asArrayList(Iterator iter) { ArrayList al = new ArrayList<>(); return (ArrayList) addAll(iter, al); } /** * Creates a HashSet containing all of the Objects returned by the given Iterator. */ public static HashSet asHashSet(Iterator iter) { HashSet hs = new HashSet<>(); return (HashSet) addAll(iter, hs); } /** * Creates a new Collection from the given CollectionFactory, and adds all of the Objects * returned by the given Iterator. */ public static Collection asCollection(Iterator iter, CollectionFactory cf) { Collection c = cf.newCollection(); return addAll(iter, c); } /** * Adds all of the Objects returned by the given Iterator into the given Collection. * * @return the given Collection */ public static Collection addAll(Iterator iter, Collection c) { while (iter.hasNext()) { c.add(iter.next()); } return c; } /** * For internal debugging purposes only. */ public static void main(String[] args) { String[] test = {"a", "b", "c"}; List l = Arrays.asList(test); System.out.println(asArrayList(l.iterator())); System.out.println(asHashSet(l.iterator())); System.out.println(asCollection(l.iterator(), CollectionFactory.hashSetFactory())); ArrayList al = new ArrayList<>(); al.add("d"); System.out.println(addAll(l.iterator(), al)); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy