All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.util.HashIndex Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

The newest version!
package edu.stanford.nlp.util;

import java.io.*;
import java.util.*;
import java.util.concurrent.Semaphore;
import java.util.function.Supplier;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;

/**
 * Implements an Index that supports constant-time lookup in
 * both directions (via {@code get(int)} and {@code indexOf(E)}.
 * The {@code indexOf(E)} method compares objects by
 * {@code equals()}, as other Collections.
 * 

* The typical usage would be: *

{@code Index index = new Index(collection);} *

followed by *

{@code int i = index.indexOf(str);} *

or *

{@code String s = index.get(i);} *

An Index can be locked or unlocked: a locked index cannot have new * items added to it. * * @author Dan Klein * @version 1.0 * @see AbstractCollection * @since 1.0 * @author Eric Yeh (added write to/load from buffer) */ // todo [cdm 2014]: Delete "extends AbstractCollection" but this will break serialization.... public class HashIndex extends AbstractCollection implements Index, RandomAccess { // these variables are also used in IntArrayIndex private final List objects; // <-- Should really almost always be an ArrayList private final Map indexes; private boolean locked; // = false; // Mutable private static final long serialVersionUID = 5398562825928375260L; /** * Clears this Index. */ @Override public void clear() { objects.clear(); indexes.clear(); } /** * Returns the index of each elem in a List. * @param elements The list of items * @return An array of indices */ public int[] indices(Collection elements) { int[] indices = new int[elements.size()]; int i = 0; for (E elem : elements) { indices[i++] = indexOf(elem); } return indices; } /** * Looks up the objects corresponding to an array of indices, and returns them in a {@link Collection}. * This collection is not a copy, but accesses the data structures of the Index. * * @param indices An array of indices * @return a {@link Collection} of the objects corresponding to the indices argument. */ @Override public Collection objects(final int[] indices) { return new AbstractList() { @Override public E get(int index) { return objects.get(indices[index]); } @Override public int size() { return indices.length; } }; } /** * Returns the number of indexed objects. * * @return the number of indexed objects. */ @Override public int size() { return objects.size(); } /** * Gets the object whose index is the integer argument. * * @param i the integer index to be queried for the corresponding argument * @return the object whose index is the integer argument. */ @Override public E get(int i) { if (i < 0 || i >= objects.size()) throw new ArrayIndexOutOfBoundsException("Index " + i + " outside the bounds [0," + size() + ")"); return objects.get(i); } /** * Returns a complete {@link List} of indexed objects, in the order of their indices. DANGER! * The current implementation returns the actual index list, not a defensive copy. Messing with this List * can seriously screw up the state of the Index. (perhaps this method needs to be eliminated? I don't think it's * ever used in ways that we couldn't use the Index itself for directly. --Roger, 12/29/04) * * @return a complete {@link List} of indexed objects */ @Override public List objectsList() { return objects; } /** * Queries the Index for whether it's locked or not. * @return whether or not the Index is locked */ @Override public boolean isLocked() { return locked; } /** Locks the Index. A locked index cannot have new elements added to it (calls to {@link #add} will * leave the Index unchanged and return {@code false}).*/ @Override public void lock() { locked = true; } /** Unlocks the Index. A locked index cannot have new elements added to it (calls to {@link #add} will * leave the Index unchanged and return {@code false}).*/ @Override public void unlock() { locked = false; } /** {@inheritDoc} */ @Override public int indexOf(E o) { Integer index = indexes.get(o); if (index == null) { return -1; } return index; } @Override public int addToIndex(E o) { Integer index = indexes.get(o); if (index == null) { if ( ! locked) { try { semaphore.acquire(); index = indexes.get(o); if (index == null) { index = objects.size(); objects.add(o); indexes.put(o, index); } semaphore.release(); } catch (InterruptedException e) { throw new RuntimeInterruptedException(e); } } else { return -1; } } return index; } /** * Add the given item to the index, but without taking any locks. * Use this method with care! * But, this offers a noticable performance improvement if it is safe to use. * * @see Index#addToIndex(E) */ public int addToIndexUnsafe(E o) { if (indexes.isEmpty()) { // a surprisingly common case in TokensRegex objects.add(o); indexes.put(o, 0); return 0; } else { Integer index = indexes.get(o); if (index == null) { if (locked) { index = -1; } else { index = objects.size(); objects.add(o); indexes.put(o, index); } } return index; } } /** * Takes an Object and returns the integer index of the Object, * perhaps adding it to the index first. * Returns -1 if the Object is not in the Index. *

* Notes: The method indexOf(x, true) is the direct replacement for * the number(x) method in the old Numberer class. This method now uses a * Semaphore object to make the index safe for concurrent multithreaded * usage. (CDM: Is this better than using a synchronized block?) * * @param o the Object whose index is desired. * @param add Whether it is okay to add new items to the index * @return The index of the Object argument. Returns -1 if the object is not in the index. */ @Override @Deprecated public int indexOf(E o, boolean add) { if (add) { return addToIndex(o); } else { return indexOf(o); } } private final Semaphore semaphore = new Semaphore(1); // TODO: delete this when breaking serialization because we can leach off of AbstractCollection /** * Adds every member of Collection to the Index. Does nothing for members already in the Index. * * @return true if some item was added to the index and false if no * item was already in the index or if the index is locked */ @Override public boolean addAll(Collection c) { boolean changed = false; for (E element: c) { changed |= add(element); //changed &= add(element); } return changed; } /** * Adds an object to the Index. If it was already in the Index, * then nothing is done. If it is not in the Index, then it is * added iff the Index hasn't been locked. * * @return true if the item was added to the index and false if the * item was already in the index or if the index is locked */ @Override public boolean add(E o) { Integer index = indexes.get(o); if (index == null && ! locked) { index = objects.size(); objects.add(o); indexes.put(o, index); return true; } return false; } /** * Checks whether an Object already has an index in the Index * @param o the object to be queried. * @return true iff there is an index for the queried object. */ @SuppressWarnings({"SuspiciousMethodCalls"}) @Override public boolean contains(Object o) { return indexes.containsKey(o); } /** * Creates a new Index. */ public HashIndex() { super(); objects = new ArrayList<>(); indexes = Generics.newHashMap(); } /** * Creates a new Index. * @param capacity Initial capacity of Index. */ public HashIndex(int capacity) { super(); objects = new ArrayList<>(capacity); indexes = Generics.newHashMap(capacity); } /** * Create a new {@code HashIndex}, backed by the given collection types. * @param objLookupFactory The constructor for the object lookup -- traditionally an {@link ArrayList}. * @param indexLookupFactory The constructor for the index lookup -- traditionally a {@link HashMap}. */ public HashIndex(Supplier> objLookupFactory, Supplier> indexLookupFactory) { this(objLookupFactory.get(), indexLookupFactory.get()); } /** Private constructor for supporting the unmodifiable view. */ private HashIndex(List objects, Map indexes) { super(); this.objects = objects; this.indexes = indexes; } /** * Creates a new Index and adds every member of c to it. * @param c A collection of objects */ public HashIndex(Collection c) { this(); addAll(c); } public HashIndex(Index index) { this(); // TODO: this assumes that no index supports deletion addAll(index.objectsList()); } @Override public void saveToFilename(String file) { try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { for (int i = 0, sz = size(); i < sz; i++) { bw.write(i + "=" + get(i) + '\n'); } } catch (IOException e) { throw new RuntimeIOException(e); } // give up } /** * This assumes each line is of the form (number=value) and it adds each value in order of the lines in the file. * Warning: This ignores the value of number, and just indexes each value it encounters in turn! * * @param file Which file to load * @return An index built out of the lines in the file */ public static Index loadFromFilename(String file) { Index index = new HashIndex<>(); try (BufferedReader br = IOUtils.readerFromString(file)) { for (String line; (line = br.readLine()) != null; ) { int start = line.indexOf('='); if (start == -1 || start == line.length() - 1) { continue; } index.add(line.substring(start + 1)); } } catch (IOException e) { throw new RuntimeIOException(e); } return index; } /** * This saves the contents of this index into string form, as part of a larger * text-serialization. This is not intended to act as a standalone routine, * instead being called from the text-serialization routine for a component * that makes use of an Index, so everything can be stored in one file. This is * similar to {@code saveToFileName}. * @param bw Writer to save to. * @throws IOException Exception thrown if cannot save. */ @Override public void saveToWriter(Writer bw) throws IOException { for (int i = 0, sz = size(); i < sz; i++) { bw.write(i + "=" + get(i) + '\n'); } } /** * This is the analogue of {@code loadFromFilename}, and is intended to be included in a routine * that unpacks a text-serialized form of an object that incorporates an Index. * NOTE: presumes that the next readLine() will read in the first line of the * portion of the text file representing the saved Index. Currently reads until it * encounters a blank line, consuming that line and returning the Index. * TODO: figure out how best to terminate: currently a blank line is considered to be a terminator. * @param br The Reader to read the index from * @return An Index read from a file */ public static Index loadFromReader(BufferedReader br) throws IOException { HashIndex index = new HashIndex<>(); String line = br.readLine(); // terminate if EOF reached, or if a blank line is encountered. while ((line != null) && (line.length() > 0)) { int start = line.indexOf('='); if (start == -1 || start == line.length() - 1) { continue; } index.add(line.substring(start + 1)); line = br.readLine(); } return index; } /** Returns a readable version of the Index contents * * @return A String showing the full index contents */ @Override public String toString() { return toString(Integer.MAX_VALUE); } public String toStringOneEntryPerLine() { return toStringOneEntryPerLine(Integer.MAX_VALUE); } /** Returns a readable version of at least part of the Index contents. * * @param n Show the first n items in the Index * @return A String showing some of the index contents */ public String toString(int n) { StringBuilder buff = new StringBuilder("["); int sz = objects.size(); if (n > sz) { n = sz; } int i; for (i = 0; i < n; i++) { E e = objects.get(i); buff.append(i).append('=').append(e); if (i < (sz-1)) buff.append(','); } if (i < sz) buff.append("..."); buff.append(']'); return buff.toString(); } public String toStringOneEntryPerLine(int n) { StringBuilder buff = new StringBuilder(); int sz = objects.size(); if (n > sz) { n = sz; } int i; for (i = 0; i < n; i++) { E e = objects.get(i); buff.append(e); if (i < (sz-1)) buff.append('\n'); } if (i < sz) buff.append("..."); return buff.toString(); } /** * Returns an iterator over the elements of the collection. * @return An iterator over the objects indexed */ @Override public Iterator iterator() { return objects.iterator(); } /** * Returns an unmodifiable view of the Index. It is just * a locked index that cannot be unlocked, so if you * try to add something, nothing will happen (it won't throw * an exception). Trying to unlock it will throw an * UnsupportedOperationException. If the * underlying Index is modified, the change will * "write-through" to the view. * * @return An unmodifiable view of the Index */ public HashIndex unmodifiableView() { HashIndex newIndex = new HashIndex(objects, indexes) { @Override public void unlock() { throw new UnsupportedOperationException("This is an unmodifiable view!"); } private static final long serialVersionUID = 3415903369787491736L; }; newIndex.lock(); return newIndex; } /** * This assumes each line is one value and creates index by adding values in the order of the lines in the file * @param file Which file to load * @return An index built out of the lines in the file */ public static Index loadFromFileWithList(String file) { Index index = new HashIndex<>(); try (BufferedReader br = new BufferedReader(new FileReader(file))) { for (String line; (line = br.readLine()) != null; ) { index.add(line.trim()); } } catch (Exception e) { throw new RuntimeIOException(e); } // forget it return index; } @Override public boolean equals(Object o) { if (this == o) return true; // TODO: why not allow equality to non-HashIndex indices? if (!(o instanceof HashIndex)) return false; HashIndex hashIndex = (HashIndex) o; return indexes.equals(hashIndex.indexes) && objects.equals(hashIndex.objects); } @Override public int hashCode() { int result = objects.hashCode(); result = 31 * result + indexes.hashCode(); return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy