
org.broadinstitute.hellbender.utils.SVIntervalTree Maven / Gradle / Ivy
package org.broadinstitute.hellbender.utils;
import com.esotericsoftware.kryo.DefaultSerializer;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import org.broadinstitute.hellbender.exceptions.GATKException;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Objects;
/**
* A Red-Black tree with intervals for keys.
* Intervals are kept in sorted order first by start value and then by end value. This cannot be overridden.
* Not thread-safe, and cannot be made so. You must synchronize externally.
*
* There's some weird stuff about sentinel values for the put and remove methods. Here's what's up with that:
* When you update the value associated with some interval by doing a put with an interval that's already in the
* tree, the old value is returned to you. But maybe you've put some nulls into the tree as values. (That's legal.)
* In that case, when you get a null value returned by put you can't tell whether the interval was inserted into the tree
* and there was no old value to return to you or whether you just updated an existing interval that had a null value
* associated with it. (Both situations return null.) IF you're inserting nulls as values, and IF you need to be able
* to tell whether the put operation did an insert or an update, you can do a special thing so that you can distinguish
* these cases: set the sentinel value for the tree to some singleton object that you never ever use as a legitimate
* value. Then when you call put you'll get your sentinel value back for an insert, but you'll get null back for an
* update of a formerly-null value. Same thing happens for remove: set the sentinel IF you've used nulls for values,
* and IF you need to be able to tell the difference between remove not finding the interval and remove removing an
* interval that has a null value associated with it.
* If you're not using nulls as values, or if you don't care to disambiguate these cases, then just forget about
* all this weirdness. The sentinel value is null by default, so put and remove will behave like you might expect them
* to if you're not worrying about this stuff: they'll return null for novel insertions and failed deletions.
*/
@DefaultSerializer(SVIntervalTree.Serializer.class)
public final class SVIntervalTree implements Iterable> {
private Node root;
private V sentinel;
public SVIntervalTree() {}
@SuppressWarnings("unchecked")
private SVIntervalTree( final Kryo kryo, final Input input ) {
final SVInterval.Serializer intervalSerializer = new SVInterval.Serializer();
int size = input.readInt();
while ( size-- > 0 ) {
final SVInterval interval = intervalSerializer.read(kryo, input, SVInterval.class);
final V value = (V)kryo.readClassAndObject(input);
put(interval, value);
}
}
private void serialize( final Kryo kryo, final Output output ) {
final SVInterval.Serializer intervalSerializer = new SVInterval.Serializer();
int nEntries = size();
output.writeInt(nEntries);
for ( final Entry entry : this ) {
intervalSerializer.write(kryo, output, entry.getInterval());
kryo.writeClassAndObject(output, entry.getValue());
nEntries -= 1;
}
if ( nEntries != 0 ) {
throw new GATKException("SVIntervalTree size and iteration gave a different number of intervals.");
}
}
/**
* Return the number of intervals in the tree.
*
* @return The number of intervals.
*/
public int size() {
return root == null ? 0 : root.getSize();
}
/**
* Remove all entries.
*/
public void clear() {
root = null;
}
/**
* Put a new interval into the tree (or update the value associated with an existing interval).
* If the interval is novel, the special sentinel value (which is null by default) is returned.
*
* @param interval The interval.
* @param value The associated value.
* @return The old value associated with that interval, or the sentinel value.
*/
public V put( final SVInterval interval, final V value ) {
V result = sentinel;
if ( root == null ) {
root = new Node<>(interval, value);
} else {
Node parent = null;
Node node = root;
int cmpVal = 0;
while ( node != null ) {
parent = node; // last non-null node
cmpVal = interval.compareTo(node.getInterval());
if ( cmpVal == 0 ) {
break;
}
node = cmpVal < 0 ? node.getLeft() : node.getRight();
}
if ( cmpVal == 0 ) {
result = parent.setValue(value);
} else if ( cmpVal < 0 ) {
root = parent.insertLeft(interval, value, root);
} else {
root = parent.insertRight(interval, value, root);
}
}
return result;
}
/**
* Remove an interval from the tree.
* If the interval is not found, the special sentinel value (which is null by default) is returned.
*
* @param interval The interval to remove.
* @return The value associated with the deleted interval, or the sentinel value.
*/
public V remove( final SVInterval interval ) {
V result = sentinel;
Node node = root;
while ( node != null ) {
final int cmpVal = interval.compareTo(node.getInterval());
if ( cmpVal == 0 ) {
result = node.getValue();
root = node.remove(root);
break;
}
node = cmpVal < 0 ? node.getLeft() : node.getRight();
}
return result;
}
/**
* Find an interval.
*
* @param interval The interval sought.
* @return The Node that represents that interval, or null.
*/
public Entry find( final SVInterval interval ) {
Node node = root;
while ( node != null ) {
final int cmpVal = interval.compareTo(node.getInterval());
if ( cmpVal == 0 ) {
break;
}
node = cmpVal < 0 ? node.getLeft() : node.getRight();
}
return node;
}
/**
* Find the nth interval in the tree.
*
* @param idx The rank of the interval sought (from 0 to size()-1).
* @return The Node that represents the nth interval.
*/
public Entry findByIndex( final int idx ) {
return Node.findByRank(root, idx + 1);
}
/**
* Find the rank of the specified interval. If the specified interval is not in the
* tree, then -1 is returned.
*
* @param interval The interval for which the index is sought.
* @return The rank of that interval, or -1.
*/
public int getIndex( final SVInterval interval ) { return Node.getRank(root, interval) - 1; }
/**
* Find the least interval in the tree.
*
* @return The earliest interval, or null if the tree is empty.
*/
public Entry min() {
Node result = null;
Node node = root;
while ( node != null ) {
result = node;
node = node.getLeft();
}
return result;
}
/**
* Find the earliest interval in the tree greater than or equal to the specified interval.
*
* @param interval The interval sought.
* @return The earliest >= interval, or null if there is none.
*/
public Entry min( final SVInterval interval ) {
Node result = null;
Node node = root;
int cmpVal = 0;
while ( node != null ) {
result = node;
cmpVal = interval.compareTo(node.getInterval());
if ( cmpVal == 0 ) {
break;
}
node = cmpVal < 0 ? node.getLeft() : node.getRight();
}
if ( cmpVal > 0 ) {
result = result.getNext();
}
return result;
}
/**
* Check the tree against a probe interval to see if there's an overlapping interval.
*
* @param interval The interval sought.
* @return Whether or not there's an overlapping interval in this tree.
*/
public boolean hasOverlapper( final SVInterval interval ) {
Node node = root;
if ( node != null && !node.getMaxEndInterval().isUpstreamOf(interval) ) {
while ( true ) {
if ( node.getInterval().overlaps(interval) ) {
return true;
} else { // no overlap. if there might be a left sub-tree overlapper, consider the left sub-tree.
final Node left = node.getLeft();
if ( left != null && !left.getMaxEndInterval().isUpstreamOf(interval) ) {
node = left;
} else { // left sub-tree cannot contain an overlapper. consider the right sub-tree.
// if everything in the right sub-tree is past the end of the query interval, then break
if ( interval.isUpstreamOf(node.getInterval()) ) {
break;
}
node = node.getRight();
// if no right sub-tree or all nodes end too early, then break
if ( node == null || node.getMaxEndInterval().isUpstreamOf(interval) ) {
break;
}
}
}
}
}
return false;
}
/**
* Find the earliest interval in the tree that overlaps the specified interval.
*
* @param interval The interval sought.
* @return The earliest overlapping interval, or null if there is none.
*/
public Entry minOverlapper( final SVInterval interval ) {
Node result = null;
Node node = root;
if ( node != null && !node.getMaxEndInterval().isUpstreamOf(interval) ) {
while ( true ) {
if ( node.getInterval().overlaps(interval) ) {
// this node overlaps. however, there might be an earlier overlapper down the left sub-tree.
// no need to consider the right sub-tree: even if there's an overlapper, if won't be minimal
result = node;
node = node.getLeft();
// if no left sub-tree or all nodes end too early, then break
if ( node == null || node.getMaxEndInterval().isUpstreamOf(interval) ) {
break;
}
} else { // no overlap. if there might be a left sub-tree overlapper, consider the left sub-tree.
final Node left = node.getLeft();
if ( left != null && !left.getMaxEndInterval().isUpstreamOf(interval) ) {
node = left;
} else { // left sub-tree cannot contain an overlapper. consider the right sub-tree.
// if everything in the right sub-tree is past the end of the query interval, then break
if ( interval.isUpstreamOf(node.getInterval()) ) {
break;
}
node = node.getRight();
// if no right sub-tree or all nodes end too early, then break
if ( node == null || node.getMaxEndInterval().isUpstreamOf(interval) ) {
break;
}
}
}
}
}
return result;
}
/**
* Find the greatest interval in the tree.
*
* @return The latest interval, or null if the tree is empty.
*/
public Entry max() {
Node result = null;
Node node = root;
while ( node != null ) {
result = node;
node = node.getRight();
}
return result;
}
/**
* Find the latest interval in the tree less than or equal to the specified interval.
*
* @param interval The interval sought.
* @return The latest <= interval, or null if there is none.
*/
public Entry max( final SVInterval interval ) {
Node result = null;
Node node = root;
int cmpVal = 0;
while ( node != null ) {
result = node;
cmpVal = interval.compareTo(node.getInterval());
if ( cmpVal == 0 ) {
break;
}
node = cmpVal < 0 ? node.getLeft() : node.getRight();
}
if ( cmpVal < 0 ) {
result = result.getPrev();
}
return result;
}
/**
* Return the interval having the largest ending value.
* This will be null if the tree is empty.
*/
public SVInterval maxEnd() {
return root == null ? null : root.getMaxEndInterval();
}
/**
* Return an iterator over the entire tree.
*
* @return An iterator.
*/
public Iterator> iterator() { return new FwdIterator((Node)min()); }
/**
* Return an iterator over all intervals greater than or equal to the specified interval.
*
* @param interval The minimum interval.
* @return An iterator.
*/
public Iterator> iterator( final SVInterval interval ) { return new FwdIterator((Node)min(interval)); }
/**
* Return an iterator over all intervals overlapping the specified interval.
*
* @param interval Interval to overlap.
* @return An iterator.
*/
public Iterator> overlappers( final SVInterval interval ) { return new OverlapIterator(interval); }
/**
* Return an iterator over the entire tree that returns intervals in reverse order.
*
* @return An iterator.
*/
public Iterator> reverseIterator() { return new RevIterator((Node)max()); }
/**
* Return an iterator over all intervals less than or equal to the specified interval, in reverse order.
*
* @param interval The maximum interval.
* @return An iterator.
*/
public Iterator> reverseIterator( final SVInterval interval ) {
return new RevIterator((Node)max(interval));
}
/**
* Get the special sentinel value that will be used to signal novelty when putting a new interval
* into the tree, or to signal "not found" when removing an interval. This is null by default.
*
* @return The sentinel value.
*/
public V getSentinel() {
return sentinel;
}
/**
* Set the special sentinel value that will be used to signal novelty when putting a new interval
* into the tree, or to signal "not found" when removing an interval.
*
* @param sentinel The new sentinel value.
* @return The old sentinel value.
*/
public V setSentinel( final V sentinel ) {
final V result = this.sentinel;
this.sentinel = sentinel;
return result;
}
/** fraction of the intervals in this tree that overlap with intervals in some other tree */
public float overlapFraction( final SVIntervalTree> that ) {
int count = 0;
for ( final Entry entry : this ) {
if ( that.hasOverlapper(entry.getInterval()) ) count += 1;
}
return (float)count/size();
}
void removeNode( final Node node ) {
root = node.remove(root);
}
public interface Entry {
SVInterval getInterval();
V1 getValue();
V1 setValue( final V1 value );
}
static final class Node implements Entry {
private final SVInterval interval;
private V1 value;
private Node parent;
private Node left;
private Node right;
private int size;
private SVInterval maxEndInterval; // interval in this sub-tree having the greatest endpoint
private boolean isBlack;
/** make a root node */
Node( final SVInterval interval, final V1 value ) {
this.interval = interval;
this.value = value;
size = 1;
maxEndInterval = interval;
isBlack = true;
}
/** make a leaf node */
Node( final Node parent, final SVInterval interval, final V1 value ) {
this.interval = interval;
this.value = value;
this.parent = parent;
size = 1;
maxEndInterval = interval;
}
@Override
public SVInterval getInterval() { return interval; }
@Override
public V1 getValue() {
return value;
}
@Override
public V1 setValue( final V1 value ) {
final V1 result = this.value;
this.value = value;
return result;
}
int getSize() {
return size;
}
SVInterval getMaxEndInterval() {
return maxEndInterval;
}
Node getLeft() {
return left;
}
Node insertLeft( final SVInterval interval, final V1 value, final Node root ) {
left = new Node<>(this, interval, value);
return insertFixup(left, root);
}
Node getRight() {
return right;
}
Node insertRight( final SVInterval interval, final V1 value, final Node root ) {
right = new Node<>(this, interval, value);
return insertFixup(right, root);
}
Node getNext() {
Node result;
if ( right != null ) {
result = right;
while ( result.left != null ) {
result = result.left;
}
} else {
Node node = this;
result = parent;
while ( result != null && node == result.right ) {
node = result;
result = result.parent;
}
}
return result;
}
Node getPrev() {
Node result;
if ( left != null ) {
result = left;
while ( result.right != null ) {
result = result.right;
}
} else {
Node node = this;
result = parent;
while ( result != null && node == result.left ) {
node = result;
result = result.parent;
}
}
return result;
}
boolean wasRemoved() {
return size == 0;
}
Node remove( final Node initialRoot ) {
Node root = initialRoot;
if ( size == 0 ) {
throw new IllegalStateException("Entry was already removed.");
}
if ( left == null ) {
if ( right == null ) { // no children
if ( parent == null ) {
root = null;
} else if ( parent.left == this ) {
parent.left = null;
fixup(parent);
if ( isBlack ) {
root = removeFixup(parent, null, root);
}
} else {
parent.right = null;
fixup(parent);
if ( isBlack ) {
root = removeFixup(parent, null, root);
}
}
} else { // single child on right
root = spliceOut(right, root);
}
} else if ( right == null ) { // single child on left
root = spliceOut(left, root);
} else { // two children
final Node next = getNext();
root = next.remove(root);
// put next into tree in same position as this, effectively removing this
if ( (next.parent = parent) == null ) {
root = next;
} else if ( parent.left == this ) {
parent.left = next;
} else {
parent.right = next;
}
if ( (next.left = left) != null ) {
left.parent = next;
}
if ( (next.right = right) != null ) {
right.parent = next;
}
next.isBlack = isBlack;
next.size = size;
fixup(next);
}
size = 0;
return root;
}
static Node getNextOverlapper( final Node startingNode, final SVInterval interval ) {
Node node = startingNode;
do {
Node nextNode = node.right;
if ( nextNode != null && !nextNode.maxEndInterval.isUpstreamOf(interval) ) {
node = nextNode;
while ( (nextNode = node.left) != null && !nextNode.maxEndInterval.isUpstreamOf(interval) )
node = nextNode;
} else {
nextNode = node;
while ( (node = nextNode.parent) != null && node.right == nextNode )
nextNode = node;
}
if ( node != null && interval.isUpstreamOf(node.interval) ) {
node = null;
}
}
while ( node != null && interval.isDisjointFrom(node.interval) );
return node;
}
static Node findByRank( final Node startingNode, final int initialRank ) {
Node node = startingNode;
int rank = initialRank;
while ( node != null ) {
final int nodeRank = node.getRank();
if ( rank == nodeRank ) {
break;
}
if ( rank < nodeRank ) {
node = node.left;
} else {
node = node.right;
rank -= nodeRank;
}
}
return node;
}
static int getRank( final Node startingNode, final SVInterval interval ) {
Node node = startingNode;
int rank = 0;
while ( node != null ) {
final int cmpVal = interval.compareTo(node.getInterval());
if ( cmpVal < 0 ) {
node = node.left;
} else {
rank += node.getRank();
if ( cmpVal == 0 ) {
return rank; // EARLY RETURN!!!
}
node = node.right;
}
}
return 0;
}
private int getRank() {
int result = 1;
if ( left != null ) {
result = left.size + 1;
}
return result;
}
private Node spliceOut( final Node child, final Node initialRoot ) {
Node root = initialRoot;
if ( (child.parent = parent) == null ) {
root = child;
child.isBlack = true;
} else {
if ( parent.left == this ) {
parent.left = child;
} else {
parent.right = child;
}
fixup(parent);
if ( isBlack ) {
root = removeFixup(parent, child, root);
}
}
return root;
}
private Node rotateLeft( final Node initialRoot ) {
Node root = initialRoot;
final Node child = right;
final int childSize = child.size;
child.size = size;
size -= childSize;
if ( (right = child.left) != null ) {
right.parent = this;
size += right.size;
}
if ( (child.parent = parent) == null ) {
root = child;
} else if ( this == parent.left ) {
parent.left = child;
} else {
parent.right = child;
}
child.left = this;
parent = child;
setMaxEnd();
child.setMaxEnd();
return root;
}
private Node rotateRight( final Node initialRoot ) {
Node root = initialRoot;
final Node child = left;
final int childSize = child.size;
child.size = size;
size -= childSize;
if ( (left = child.right) != null ) {
left.parent = this;
size += left.size;
}
if ( (child.parent = parent) == null ) {
root = child;
} else if ( this == parent.left ) {
parent.left = child;
} else {
parent.right = child;
}
child.right = this;
parent = child;
setMaxEnd();
child.setMaxEnd();
return root;
}
private void setMaxEnd() {
maxEndInterval = interval;
if ( left != null ) {
maxEndInterval = laterEndingInterval(maxEndInterval, left.maxEndInterval);
}
if ( right != null ) {
maxEndInterval = laterEndingInterval(maxEndInterval, right.maxEndInterval);
}
}
private static SVInterval laterEndingInterval( final SVInterval interval1, final SVInterval interval2 ) {
final int contig1 = interval1.getContig();
final int contig2 = interval2.getContig();
if ( contig1 > contig2 ) {
return interval1;
}
if ( contig2 > contig1 ) {
return interval2;
}
if ( interval2.getEnd() > interval1.getEnd() ) {
return interval2;
}
return interval1;
}
private static void fixup( final Node initialNode ) {
Node node = initialNode;
do {
node.size = 1;
if ( node.left != null ) {
node.size += node.left.size;
}
if ( node.right != null ) {
node.size += node.right.size;
}
node.setMaxEnd();
}
while ( (node = node.parent) != null );
}
private static Node insertFixup( final Node initialDaughter, final Node initialRoot ) {
Node daughter = initialDaughter;
Node root = initialRoot;
Node mom = daughter.parent;
fixup(mom);
while ( mom != null && !mom.isBlack ) {
final Node gramma = mom.parent;
Node auntie = gramma.left;
if ( auntie == mom ) {
auntie = gramma.right;
if ( auntie != null && !auntie.isBlack ) {
mom.isBlack = true;
auntie.isBlack = true;
gramma.isBlack = false;
daughter = gramma;
} else {
if ( daughter == mom.right ) {
root = mom.rotateLeft(root);
mom = daughter;
}
mom.isBlack = true;
gramma.isBlack = false;
root = gramma.rotateRight(root);
break;
}
} else {
if ( auntie != null && !auntie.isBlack ) {
mom.isBlack = true;
auntie.isBlack = true;
gramma.isBlack = false;
daughter = gramma;
} else {
if ( daughter == mom.left ) {
root = mom.rotateRight(root);
mom = daughter;
}
mom.isBlack = true;
gramma.isBlack = false;
root = gramma.rotateLeft(root);
break;
}
}
mom = daughter.parent;
}
root.isBlack = true;
return root;
}
private static Node removeFixup( final Node initialParent,
final Node initialNode, final Node initialRoot ) {
Node parent = initialParent;
Node node = initialNode;
Node root = initialRoot;
do {
if ( node == parent.left ) {
Node sister = parent.right;
if ( !sister.isBlack ) {
sister.isBlack = true;
parent.isBlack = false;
root = parent.rotateLeft(root);
sister = parent.right;
}
if ( (sister.left == null || sister.left.isBlack) && (sister.right == null || sister.right.isBlack) ) {
sister.isBlack = false;
node = parent;
} else {
if ( sister.right == null || sister.right.isBlack ) {
sister.left.isBlack = true;
sister.isBlack = false;
root = sister.rotateRight(root);
sister = parent.right;
}
sister.isBlack = parent.isBlack;
parent.isBlack = true;
sister.right.isBlack = true;
root = parent.rotateLeft(root);
node = root;
}
} else {
Node sister = parent.left;
if ( !sister.isBlack ) {
sister.isBlack = true;
parent.isBlack = false;
root = parent.rotateRight(root);
sister = parent.left;
}
if ( (sister.left == null || sister.left.isBlack) && (sister.right == null || sister.right.isBlack) ) {
sister.isBlack = false;
node = parent;
} else {
if ( sister.left == null || sister.left.isBlack ) {
sister.right.isBlack = true;
sister.isBlack = false;
root = sister.rotateLeft(root);
sister = parent.left;
}
sister.isBlack = parent.isBlack;
parent.isBlack = true;
sister.left.isBlack = true;
root = parent.rotateRight(root);
node = root;
}
}
parent = node.parent;
}
while ( parent != null && node.isBlack );
node.isBlack = true;
return root;
}
}
abstract class IteratorBase implements Iterator> {
protected Node next;
protected Node last;
protected IteratorBase( final Node node ) { next = node; }
@Override
public boolean hasNext() {
return next != null;
}
@Override
public void remove() {
if ( last == null ) {
throw new IllegalStateException("No entry to remove.");
}
removeNode(last);
last = null;
}
/** equality of iterators is defined as having the same current position in the tree */
@Override
public boolean equals( final Object obj ) {
return obj instanceof SVIntervalTree>.IteratorBase && ((SVIntervalTree>.IteratorBase)obj).next == next;
}
@Override
public int hashCode() {
return Objects.hashCode(next);
}
}
public final class FwdIterator extends IteratorBase {
public FwdIterator( final Node node ) {
super(node);
}
@Override
public Node next() {
if ( next == null ) {
throw new NoSuchElementException("No next element.");
}
if ( next.wasRemoved() ) {
next = (Node)min(next.getInterval());
if ( next == null ) {
throw new ConcurrentModificationException("Current element was removed, and there are no more elements.");
}
}
last = next;
next = next.getNext();
return last;
}
}
public final class RevIterator extends IteratorBase {
public RevIterator( final Node node ) {
super(node);
}
@Override
public Node next() {
if ( next == null ) {
throw new NoSuchElementException("No next element.");
}
if ( next.wasRemoved() ) {
next = (Node)max(next.getInterval());
if ( next == null ) {
throw new ConcurrentModificationException("Current element was removed, and there are no more elements.");
}
}
last = next;
next = next.getPrev();
return last;
}
}
public final class OverlapIterator extends IteratorBase {
private final SVInterval interval;
public OverlapIterator( final SVInterval interval ) {
super((Node)minOverlapper(interval));
this.interval = interval;
}
@Override
public Node next() {
if ( next == null ) {
throw new NoSuchElementException("No next element.");
}
if ( next.wasRemoved() ) {
throw new ConcurrentModificationException("Current element was removed.");
}
last = next;
next = Node.getNextOverlapper(next, interval);
return last;
}
}
public final static class ValuesIterator implements Iterator {
private final Iterator> itr;
public ValuesIterator( final Iterator> itr ) {
this.itr = itr;
}
@Override
public boolean hasNext() {
return itr.hasNext();
}
@Override
public V1 next() {
return itr.next().getValue();
}
@Override
public void remove() {
itr.remove();
}
}
public static final class Serializer extends com.esotericsoftware.kryo.Serializer> {
@Override
public void write( final Kryo kryo, final Output output, final SVIntervalTree interval ) {
interval.serialize(kryo, output);
}
@Override
public SVIntervalTree read( final Kryo kryo, final Input input, final Class> klass ) {
return new SVIntervalTree<>(kryo, input);
}
}
}