All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.interedition.text.TextRange Maven / Gradle / Ivy

The newest version!
package eu.interedition.text;

import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import java.util.SortedSet;
import javax.annotation.Nullable;

/**
 * Adresses a text segment, for example a segment, that is annotated by some {@link Layer annotation}.
 * 

*

*

* Segments are adressed by start and end offsets of the characters forming the boundaries of a segment. The character pointed to by * the start offset is included in the segment, while the character addressed by the end offset is the first excluded from it. *

*

*

* Offsets are counted from zero and are located in the gaps between characters: *

*

 *   a   b   c   d   e
 * 0 | 1 | 2 | 3 | 4 | 5
 * 
*

* In the given example, the substring "bcd" would be adressed by the segment [1, 4], the whole string by the segment * [0, 5]. Note that the difference between the offsets equals the length of the segment and that "empty" segments * pointing in the gaps between characters are valid. So for example to point to the gap between "d" and "e", the corresponding * empty segment's address would be [4, 4]. *

*

*

* Apart from encapsulating the offset values denoting the segment, objects of this class also have methods to apply Gavin * Nicols' Core Range Algebra. These methods like {@link #encloses(TextRange)} or {@link #hasOverlapWith(TextRange)} define * relationships between text segments, which can be used for example to filter sets of range annotations. * * @author Gregor Middell * @see CharSequence#subSequence(int, int) */ public class TextRange implements Comparable, Function { public static final TextRange NULL = new TextRange(0, 0); /** * The start offset of the segment (counted from zero, inclusive). */ private final long start; /** * The end offset of the segment (counted from zero, exclusive). */ private final long end; /** * Creates a text segment address. * * @param start start offset * @param end end offset * @throws IllegalArgumentException if start or end or lower than zero, or if start is greather than * end */ public TextRange(long start, long end) { if (start < 0 || end < 0 || start > end) { throw new IllegalArgumentException(toString(start, end)); } this.start = start; this.end = end; } /** * Copy constructor. * * @param b the segment address to be copied */ public TextRange(TextRange b) { this(b.start, b.end); } public long getStart() { return start; } public long getEnd() { return end; } /** * The length of the adressed segment. * * @return the length (difference between start and end offset) */ public long length() { return end - start; } /** * a.start <= b.start and a.end >= b.end * * @param b b range * @return true/false */ public boolean encloses(TextRange b) { return (start <= b.start) && (end >= b.end); } /** * a.start = b.start and a.end > b.end * * @param b b range * @return true/false */ boolean enclosesWithSuffix(TextRange b) { return (start == b.start) && (end > b.end); } /** * a.start < b.start and a.end = b.end * * @param b b range * @return true/false */ boolean enclosesWithPrefix(TextRange b) { return (start < b.start) && (end == b.end); } /** * (a <> b) and a.start > b.start and a.end <= b.end * * @param b b range * @return true/false */ boolean fitsWithin(TextRange b) { return !equals(b) && (start >= b.start) && (end <= b.end); } /** * overlap(a, b) > 0 * * @param b b range * @return true/false */ public boolean hasOverlapWith(TextRange b) { final TextRange overlap = overlap(b); return (overlap != null) && (overlap.length() > 0); } /** * Yields the overlapping segment of this and another segment. * * @param b another segment * @return [max(a.start, b.start), min(a.end, b.end)] */ public TextRange intersectionWith(TextRange b) { return new TextRange(Math.max(start, b.start), Math.min(end, b.end)); } /** * min(a.end, b.end) - max(a.start, b.start) * * @param b b range * @return length of overlap */ public TextRange overlap(TextRange b) { final long start = Math.max(this.start, b.start); final long end = Math.min(this.end, b.end); return ((end - start) >= 0 ? new TextRange(start, end) : null); } /** * b.start >= a.end * * @param b b range * @return true/false */ public boolean precedes(TextRange b) { return b.start >= end; } /** * a.start >= (b.end - 1) * * @param b b range * @return true/false */ public boolean follows(TextRange b) { return (start >= (b.end - 1)); } public TextRange shift(long delta) { return new TextRange(start + delta, end + delta); } /** * Orders segments, first by start offset, then by the reverse order of the end offsets. * * @see Comparable#compareTo(Object) */ public int compareTo(TextRange o) { final long result = (start == o.start ? o.end - end : start - o.start); return (result < 0 ? -1 : (result > 0 ? 1 : 0)); } @Override public int hashCode() { return Objects.hashCode(start, end); } @Override public String apply(@Nullable String input) { final int inputLength = input.length(); Preconditions.checkArgument(start <= inputLength && end <= inputLength, toString()); return (length() == 0 ? "" : input.substring((int) start, (int) end)); } @Override public boolean equals(Object obj) { if (obj == null || !(obj instanceof TextRange)) { return super.equals(obj); } TextRange b = (TextRange) obj; return (this.start == b.start) && (this.end == b.end); } /** * Creates a string representation of an offset pair. * * @param start start offset * @param end end offset * @return string representation */ private static String toString(long start, long end) { return "[" + start + ", " + end + "]"; } @Override public String toString() { return toString(start, end); } public SortedSet substract(TextRange subtrahend) { Preconditions.checkArgument(hasOverlapWith(subtrahend)); final SortedSet remainders = Sets.newTreeSet(); if (fitsWithin(subtrahend)) { return remainders; } if (enclosesWithPrefix(subtrahend)) { remainders.add(new TextRange(subtrahend.start, end)); } else if (enclosesWithSuffix(subtrahend)) { remainders.add(new TextRange(start, subtrahend.end)); } else { remainders.add(new TextRange(start, subtrahend.start)); remainders.add(new TextRange(subtrahend.end, end)); } return remainders; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy