All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ie.machinereading.structure.Span Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.ie.machinereading.structure;

import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;

import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.Pair;

/**
 * Stores the offsets for a span of text
 * Offsets may indicate either token or byte positions
 * Start is inclusive, end is exclusive
 * @author Mihai 
 */
public class Span implements Serializable, Iterable {
  private static final long serialVersionUID = -3861451490217976693L;

  private int start;
  private int end;

  /** For Kryo serializer */
  @SuppressWarnings("UnusedDeclaration")
  private Span() { }

  /**
   * This assumes that s <= e.  Use fromValues if you can't guarantee this.
   */
  public Span(int s, int e) {
    start = s;
    end = e;
  }
  
  /**
   * Creates a span that encloses all spans in the argument list.  Behavior is undefined if given no arguments.
   */
  public Span(Span... spans) {
    this(Integer.MAX_VALUE, Integer.MIN_VALUE);

    for (Span span : spans) {
      expandToInclude(span);
    }
  }
  
  /**
   * Safe way to construct Spans if you're not sure which value is higher.
   */
  @SuppressWarnings("UnusedDeclaration")
  public static Span fromValues(int val1, int val2) {
    if (val1 <= val2) {
      return new Span(val1, val2);
    } else {
      return new Span(val2, val1);
    }
  }

  public static Span fromValues(Object... values) {
    if (values.length == 1) {
      return fromValues(values[0], values[0] instanceof Number ? ((Number) values[0]).intValue() + 1 : Integer.parseInt(values[0].toString()) + 1);
    }
    if (values.length != 2) { throw new IllegalArgumentException("fromValues() must take an array with 2 elements"); }
    int val1;
    if (values[0] instanceof Number) { val1 = ((Number) values[0]).intValue(); }
    else if (values[0] instanceof String) { val1 = Integer.parseInt((String) values[0]); }
    else { throw new IllegalArgumentException("Unknown value for span: " + values[0]); }
    int val2;
    if (values[1] instanceof Number) { val2 = ((Number) values[1]).intValue(); }
    else if (values[0] instanceof String) { val2 = Integer.parseInt((String) values[1]); }
    else { throw new IllegalArgumentException("Unknown value for span: " + values[1]); }
    return fromValues(val1, val2);
  }

  public int start() { return start; }
  public int end() { return end; }
  
  public void setStart(int s) { start = s; }
  public void setEnd(int e) { end = e; }
  
  @Override
  public boolean equals(Object other) {
    if(! (other instanceof Span)) return false;
    Span otherSpan = (Span) other;
    return start == otherSpan.start && end == otherSpan.end;
  }
  
  @Override
  public int hashCode() {
    return (new Pair<>(start, end)).hashCode();
  }
  
  @Override
  public String toString() {
    return "[" + start + "," + end + ")";
  }
  
  public void expandToInclude(Span otherSpan) {
    if (otherSpan.start() < start) {
      setStart(otherSpan.start());
    }
    if (otherSpan.end() > end) {
      setEnd(otherSpan.end());
    }
  }

  /**
   * Returns true if this span contains otherSpan.  Endpoints on spans may match.
   */
  public boolean contains(Span otherSpan) {
    return this.start <= otherSpan.start && otherSpan.end <= this.end;
  }
  
  /**
   * Returns true if i is inside this span.  Note that the start is inclusive and the end is exclusive.
   */
  public boolean contains(int i) {
    return this.start <= i && i < this.end;
  }

  /**
   * Returns true if this span ends before the otherSpan starts.
   * 
   * @throws IllegalArgumentException if either span contains the other span
   */
  public boolean isBefore(Span otherSpan) {
    if (this.contains(otherSpan) || otherSpan.contains(this)) {
      throw new IllegalArgumentException("Span " + toString() + " contains otherSpan " + otherSpan + " (or vice versa)");
    }
    return this.end <= otherSpan.start;
  }

  /**
   * Returns true if this span starts after the otherSpan's end.
   * 
   * @throws IllegalArgumentException if either span contains the other span
   */
  @SuppressWarnings("UnusedDeclaration")
  public boolean isAfter(Span otherSpan) {
    if (this.contains(otherSpan) || otherSpan.contains(this)) {
      throw new IllegalArgumentException("Span " + toString() + " contains otherSpan " + otherSpan + " (or vice versa)");
    }
    return this.start >= otherSpan.end;
  }

  /**
   * Move a span by the given amount. Useful for, e.g., switching between 0- and 1-indexed spans.
   * @param diff The difference to ADD to both the beginning and end of the span. So, -1 moves the span left by one.
   * @return A new span, offset by the given difference.
   */
  public Span translate(int diff) {
    return new Span(start + diff, end + diff);
  }

  /**
   * Convert an end-exclusive span to an end-inclusive span.
   */
  public Span toInclusive() {
    assert end > start;
    return new Span(start, end - 1);
  }

  /**
   * Convert an end-inclusive span to an end-exclusive span.
   */
  public Span toExclusive() {
    return new Span(start, end + 1);
  }

  @Override
  public Iterator iterator() {
    return new Iterator() {
      int nextIndex = start;
      @Override
      public boolean hasNext() {
        return nextIndex < end;
      }
      @Override
      public Integer next() {
        if (!hasNext()) { throw new NoSuchElementException(); }
        nextIndex += 1;
        return nextIndex - 1;
      }
      @Override
      public void remove() { throw new UnsupportedOperationException(); }
    };
  }

  public int size() {
    return end - start;
  }

  public static boolean overlaps(Span spanA, Span spanB) {
    return spanA.contains(spanB) ||
            spanB.contains(spanA) ||
            (spanA.end > spanB.end && spanA.start < spanB.end) ||
            (spanB.end > spanA.end && spanB.start < spanA.end) ||
            spanA.equals(spanB);
  }

  public static int overlap(Span spanA, Span spanB) {
    if (spanA.contains(spanB)) {
      return Math.min(spanA.end - spanA.start, spanB.end - spanB.start);
    } else if (spanA.equals(spanB)) {
      return spanA.end - spanA.start;
    } else if ( (spanA.end > spanB.end && spanA.start < spanB.end) ||
                (spanB.end > spanA.end && spanB.start < spanA.end) ) {
      return Math.min(spanA.end, spanB.end) - Math.max(spanA.start, spanB.start) ;
    } else {
      return 0;
    }
  }

  public static boolean overlaps(Span spanA, Collection spanB) {
    for (Span candidate : spanB) {
      if (overlaps(spanA, candidate)) { return true; }
    }
    return false;
  }

  /**
   * Returns the smallest distance between two spans.
   */
  public static int distance(Span a, Span b) {
    if (Span.overlaps(a, b)) {
      return 0;
    } else if (a.contains(b) || b.contains(a)) {
      return 0;
    } else if (a.isBefore(b)) {
      return b.start - a.end;
    } else if (b.isBefore(a)) {
      return a.start - b.end;
    } else {
      throw new IllegalStateException("This should be impossible...");
    }
  }

  /**
   * A silly translation between a pair and a span.
   */
  public static Span fromPair(Pair span) {
    return fromValues(span.first, span.second);
  }

  /**
   * Another silly translation between a pair and a span.
   */
  public static Span fromPair(IntPair span) {
    return fromValues(span.getSource(), span.getTarget());
  }

  /**
   * A silly translation between a pair and a span.
   */
  public static Span fromPairOneIndexed(Pair span) {
    return fromValues(span.first - 1, span.second - 1);
  }

  /**
   * The union of two spans. That is, the minimal span that contains both.
   */
  public static Span union(Span a, Span b) {
    return Span.fromValues(Math.min(a.start, b.start), Math.max(a.end, b.end));
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy