All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ie.machinereading.structure.Span Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ie.machinereading.structure;

import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;

import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.Pair;

/**
 * Stores the offsets for a span of text
 * Offsets may indicate either token or byte positions
 * Start is inclusive, end is exclusive
 * @author Mihai 
 */
public class Span implements Serializable, Iterable {
  private static final long serialVersionUID = -3861451490217976693L;

  private int start;
  private int end;

  /** For Kryo serializer */
  @SuppressWarnings("UnusedDeclaration")
  private Span() { }

  /**
   * This assumes that s <= e.  Use fromValues if you can't guarantee this.
   */
  public Span(int s, int e) {
    start = s;
    end = e;
  }
  
  /**
   * Creates a span that encloses all spans in the argument list.  Behavior is undefined if given no arguments.
   */
  public Span(Span... spans) {
    this(Integer.MAX_VALUE, Integer.MIN_VALUE);

    for (Span span : spans) {
      expandToInclude(span);
    }
  }
  
  /**
   * Safe way to construct Spans if you're not sure which value is higher.
   */
  @SuppressWarnings("UnusedDeclaration")
  public static Span fromValues(int val1, int val2) {
    if (val1 <= val2) {
      return new Span(val1, val2);
    } else {
      return new Span(val2, val1);
    }
  }

  public static Span fromValues(Object... values) {
    if (values.length == 1) {
      return fromValues(values[0], values[0] instanceof Number ? ((Number) values[0]).intValue() + 1 : Integer.parseInt(values[0].toString()) + 1);
    }
    if (values.length != 2) { throw new IllegalArgumentException("fromValues() must take an array with 2 elements"); }
    int val1;
    if (values[0] instanceof Number) { val1 = ((Number) values[0]).intValue(); }
    else if (values[0] instanceof String) { val1 = Integer.parseInt((String) values[0]); }
    else { throw new IllegalArgumentException("Unknown value for span: " + values[0]); }
    int val2;
    if (values[1] instanceof Number) { val2 = ((Number) values[1]).intValue(); }
    else if (values[0] instanceof String) { val2 = Integer.parseInt((String) values[1]); }
    else { throw new IllegalArgumentException("Unknown value for span: " + values[1]); }
    return fromValues(val1, val2);
  }

  public int start() { return start; }
  public int end() { return end; }
  
  public void setStart(int s) { start = s; }
  public void setEnd(int e) { end = e; }
  
  @Override
  public boolean equals(Object other) {
    if(! (other instanceof Span)) return false;
    Span otherSpan = (Span) other;
    return start == otherSpan.start && end == otherSpan.end;
  }
  
  @Override
  public int hashCode() {
    return (new Pair<>(start, end)).hashCode();
  }
  
  @Override
  public String toString() {
    return "[" + start + "," + end + ")";
  }
  
  public void expandToInclude(Span otherSpan) {
    if (otherSpan.start() < start) {
      setStart(otherSpan.start());
    }
    if (otherSpan.end() > end) {
      setEnd(otherSpan.end());
    }
  }

  /**
   * Returns true if this span contains otherSpan.  Endpoints on spans may match.
   */
  public boolean contains(Span otherSpan) {
    return this.start <= otherSpan.start && otherSpan.end <= this.end;
  }
  
  /**
   * Returns true if i is inside this span.  Note that the start is inclusive and the end is exclusive.
   */
  public boolean contains(int i) {
    return this.start <= i && i < this.end;
  }

  /**
   * Returns true if this span ends before the otherSpan starts.
   * 
   * @throws IllegalArgumentException if either span contains the other span
   */
  public boolean isBefore(Span otherSpan) {
    if (this.contains(otherSpan) || otherSpan.contains(this)) {
      throw new IllegalArgumentException("Span " + toString() + " contains otherSpan " + otherSpan + " (or vice versa)");
    }
    return this.end <= otherSpan.start;
  }

  /**
   * Returns true if this span starts after the otherSpan's end.
   * 
   * @throws IllegalArgumentException if either span contains the other span
   */
  @SuppressWarnings("UnusedDeclaration")
  public boolean isAfter(Span otherSpan) {
    if (this.contains(otherSpan) || otherSpan.contains(this)) {
      throw new IllegalArgumentException("Span " + toString() + " contains otherSpan " + otherSpan + " (or vice versa)");
    }
    return this.start >= otherSpan.end;
  }

  /**
   * Move a span by the given amount. Useful for, e.g., switching between 0- and 1-indexed spans.
   * @param diff The difference to ADD to both the beginning and end of the span. So, -1 moves the span left by one.
   * @return A new span, offset by the given difference.
   */
  public Span translate(int diff) {
    return new Span(start + diff, end + diff);
  }

  /**
   * Convert an end-exclusive span to an end-inclusive span.
   */
  public Span toInclusive() {
    assert end > start;
    return new Span(start, end - 1);
  }

  /**
   * Convert an end-inclusive span to an end-exclusive span.
   */
  public Span toExclusive() {
    return new Span(start, end + 1);
  }

  @Override
  public Iterator iterator() {
    return new Iterator() {
      int nextIndex = start;
      @Override
      public boolean hasNext() {
        return nextIndex < end;
      }
      @Override
      public Integer next() {
        if (!hasNext()) { throw new NoSuchElementException(); }
        nextIndex += 1;
        return nextIndex - 1;
      }
      @Override
      public void remove() { throw new UnsupportedOperationException(); }
    };
  }

  public int size() {
    return end - start;
  }

  public static boolean overlaps(Span spanA, Span spanB) {
    return spanA.contains(spanB) ||
            spanB.contains(spanA) ||
            (spanA.end > spanB.end && spanA.start < spanB.end) ||
            (spanB.end > spanA.end && spanB.start < spanA.end) ||
            spanA.equals(spanB);
  }

  public static int overlap(Span spanA, Span spanB) {
    if (spanA.contains(spanB)) {
      return Math.min(spanA.end - spanA.start, spanB.end - spanB.start);
    } else if (spanA.equals(spanB)) {
      return spanA.end - spanA.start;
    } else if ( (spanA.end > spanB.end && spanA.start < spanB.end) ||
                (spanB.end > spanA.end && spanB.start < spanA.end) ) {
      return Math.min(spanA.end, spanB.end) - Math.max(spanA.start, spanB.start) ;
    } else {
      return 0;
    }
  }

  public static boolean overlaps(Span spanA, Collection spanB) {
    for (Span candidate : spanB) {
      if (overlaps(spanA, candidate)) { return true; }
    }
    return false;
  }

  /**
   * Returns the smallest distance between two spans.
   */
  public static int distance(Span a, Span b) {
    if (Span.overlaps(a, b)) {
      return 0;
    } else if (a.contains(b) || b.contains(a)) {
      return 0;
    } else if (a.isBefore(b)) {
      return b.start - a.end;
    } else if (b.isBefore(a)) {
      return a.start - b.end;
    } else {
      throw new IllegalStateException("This should be impossible...");
    }
  }

  /**
   * A silly translation between a pair and a span.
   */
  public static Span fromPair(Pair span) {
    return fromValues(span.first, span.second);
  }

  /**
   * Another silly translation between a pair and a span.
   */
  public static Span fromPair(IntPair span) {
    return fromValues(span.getSource(), span.getTarget());
  }

  /**
   * A silly translation between a pair and a span.
   */
  public static Span fromPairOneIndexed(Pair span) {
    return fromValues(span.first - 1, span.second - 1);
  }

  /**
   * The union of two spans. That is, the minimal span that contains both.
   */
  public static Span union(Span a, Span b) {
    return Span.fromValues(Math.min(a.start, b.start), Math.max(a.end, b.end));
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy