
eu.interedition.text.TextRange Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of text-core Show documentation
Show all versions of text-core Show documentation
Stand-off Markup/Annotation Text Model
The newest version!
package eu.interedition.text;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import java.util.SortedSet;
import javax.annotation.Nullable;
/**
* Adresses a text segment, for example a segment, that is annotated by some {@link Layer annotation}.
*
*
*
* Segments are adressed by start and end offsets of the characters forming the boundaries of a segment. The character pointed to by
* the start offset is included in the segment, while the character addressed by the end offset is the first excluded from it.
*
*
*
* Offsets are counted from zero and are located in the gaps between characters:
*
*
* a b c d e
* 0 | 1 | 2 | 3 | 4 | 5
*
*
* In the given example, the substring "bcd" would be adressed by the segment [1, 4]
, the whole string by the segment
* [0, 5]
. Note that the difference between the offsets equals the length of the segment and that "empty" segments
* pointing in the gaps between characters are valid. So for example to point to the gap between "d" and "e", the corresponding
* empty segment's address would be [4, 4]
.
*
*
*
* Apart from encapsulating the offset values denoting the segment, objects of this class also have methods to apply Gavin
* Nicols' Core Range Algebra. These methods like {@link #encloses(TextRange)} or {@link #hasOverlapWith(TextRange)} define
* relationships between text segments, which can be used for example to filter sets of range annotations.
*
* @author Gregor Middell
* @see CharSequence#subSequence(int, int)
*/
public class TextRange implements Comparable, Function {
public static final TextRange NULL = new TextRange(0, 0);
/**
* The start offset of the segment (counted from zero, inclusive).
*/
private final long start;
/**
* The end offset of the segment (counted from zero, exclusive).
*/
private final long end;
/**
* Creates a text segment address.
*
* @param start start offset
* @param end end offset
* @throws IllegalArgumentException if start
or end
or lower than zero, or if start
is greather than
* end
*/
public TextRange(long start, long end) {
if (start < 0 || end < 0 || start > end) {
throw new IllegalArgumentException(toString(start, end));
}
this.start = start;
this.end = end;
}
/**
* Copy constructor.
*
* @param b the segment address to be copied
*/
public TextRange(TextRange b) {
this(b.start, b.end);
}
public long getStart() {
return start;
}
public long getEnd() {
return end;
}
/**
* The length of the adressed segment.
*
* @return the length (difference between start and end offset)
*/
public long length() {
return end - start;
}
/**
* a.start <= b.start and a.end >= b.end
*
* @param b b range
* @return true
/false
*/
public boolean encloses(TextRange b) {
return (start <= b.start) && (end >= b.end);
}
/**
* a.start = b.start and a.end > b.end
*
* @param b b range
* @return true
/false
*/
boolean enclosesWithSuffix(TextRange b) {
return (start == b.start) && (end > b.end);
}
/**
* a.start < b.start and a.end = b.end
*
* @param b b range
* @return true
/false
*/
boolean enclosesWithPrefix(TextRange b) {
return (start < b.start) && (end == b.end);
}
/**
* (a <> b) and a.start > b.start and a.end <= b.end
*
* @param b b range
* @return true
/false
*/
boolean fitsWithin(TextRange b) {
return !equals(b) && (start >= b.start) && (end <= b.end);
}
/**
* overlap(a, b) > 0
*
* @param b b range
* @return true
/false
*/
public boolean hasOverlapWith(TextRange b) {
final TextRange overlap = overlap(b);
return (overlap != null) && (overlap.length() > 0);
}
/**
* Yields the overlapping segment of this and another segment.
*
* @param b another segment
* @return [max(a.start, b.start), min(a.end, b.end)]
*/
public TextRange intersectionWith(TextRange b) {
return new TextRange(Math.max(start, b.start), Math.min(end, b.end));
}
/**
* min(a.end, b.end) - max(a.start, b.start)
*
* @param b b range
* @return length of overlap
*/
public TextRange overlap(TextRange b) {
final long start = Math.max(this.start, b.start);
final long end = Math.min(this.end, b.end);
return ((end - start) >= 0 ? new TextRange(start, end) : null);
}
/**
* b.start >= a.end
*
* @param b b range
* @return true
/false
*/
public boolean precedes(TextRange b) {
return b.start >= end;
}
/**
* a.start >= (b.end - 1)
*
* @param b b range
* @return true
/false
*/
public boolean follows(TextRange b) {
return (start >= (b.end - 1));
}
public TextRange shift(long delta) {
return new TextRange(start + delta, end + delta);
}
/**
* Orders segments, first by start offset, then by the reverse order of the end offsets.
*
* @see Comparable#compareTo(Object)
*/
public int compareTo(TextRange o) {
final long result = (start == o.start ? o.end - end : start - o.start);
return (result < 0 ? -1 : (result > 0 ? 1 : 0));
}
@Override
public int hashCode() {
return Objects.hashCode(start, end);
}
@Override
public String apply(@Nullable String input) {
final int inputLength = input.length();
Preconditions.checkArgument(start <= inputLength && end <= inputLength, toString());
return (length() == 0 ? "" : input.substring((int) start, (int) end));
}
@Override
public boolean equals(Object obj) {
if (obj == null || !(obj instanceof TextRange)) {
return super.equals(obj);
}
TextRange b = (TextRange) obj;
return (this.start == b.start) && (this.end == b.end);
}
/**
* Creates a string representation of an offset pair.
*
* @param start start offset
* @param end end offset
* @return string representation
*/
private static String toString(long start, long end) {
return "[" + start + ", " + end + "]";
}
@Override
public String toString() {
return toString(start, end);
}
public SortedSet substract(TextRange subtrahend) {
Preconditions.checkArgument(hasOverlapWith(subtrahend));
final SortedSet remainders = Sets.newTreeSet();
if (fitsWithin(subtrahend)) {
return remainders;
}
if (enclosesWithPrefix(subtrahend)) {
remainders.add(new TextRange(subtrahend.start, end));
} else if (enclosesWithSuffix(subtrahend)) {
remainders.add(new TextRange(start, subtrahend.end));
} else {
remainders.add(new TextRange(start, subtrahend.start));
remainders.add(new TextRange(subtrahend.end, end));
}
return remainders;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy