Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.util;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTextHeaderCodec;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* Represents a list of intervals against a reference sequence that can be written to
* and read from a file. The file format is relatively simple and reflects the SAM
* alignment format to a degree.
*
* A SAM style header must be present in the file which lists the sequence records
* against which the intervals are described. After the header the file then contains
* records one per line in text format with the following values tab-separated:
* Sequence name,
* Start position (1-based),
* End position (1-based, end inclusive),
* Strand (either + or -),
* Interval name (an, ideally unique, name for the interval),
*
* @author Tim Fennell
* @author Yossi Farjoun
*/
public class IntervalList implements Iterable {
public static final String INTERVAL_LIST_FILE_EXTENSION = ".interval_list";
private final SAMFileHeader header;
private final List intervals = new ArrayList();
private static final Log log = Log.getInstance(IntervalList.class);
/** Constructs a new interval list using the supplied header information. */
public IntervalList(final SAMFileHeader header) {
if (header == null) throw new IllegalArgumentException("SAMFileHeader must be supplied.");
this.header = header;
}
/** Constructs a new interval list using the supplied header information. */
public IntervalList(final SAMSequenceDictionary dict) {
this(new SAMFileHeader(dict));
}
/** Gets the header (if there is one) for the interval list. */
public SAMFileHeader getHeader() { return header; }
/** Returns an iterator over the intervals. */
@Override
public Iterator iterator() { return this.intervals.iterator(); }
/** Adds an interval to the list of intervals. */
public void add(final Interval interval) {
if (header.getSequence(interval.getContig()) == null) {
throw new IllegalArgumentException(String.format("Cannot add interval %s, contig not in header", interval.toString()));
}
this.intervals.add(interval);
}
/** Adds a Collection of intervals to the list of intervals. */
public void addall(final Collection intervals) {
//use this instead of addAll so that the contig checking happens.
for (Interval interval : intervals) {
add(interval);
}
}
/**
* Sorts the internal collection of intervals by coordinate.
*
* Note: this function modifies the object in-place and is therefore difficult to work with.
*
* @deprecated use {@link #sorted()} instead.
*/
@Deprecated
public void sort() {
Collections.sort(this.intervals, new IntervalCoordinateComparator(this.header));
this.header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
}
/** Returns a new IntervalList where each interval is padded by the specified amount of bases. */
public IntervalList padded(final int before, final int after) {
if (before < 0 || after < 0) throw new IllegalArgumentException("Padding values must be >= 0.");
final IntervalList padded = new IntervalList(this.getHeader().clone());
final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary();
for (final Interval i : this) {
final SAMSequenceRecord seq = dict.getSequence(i.getContig());
final int start = Math.max(1, i.getStart() - before);
final int end = Math.min(seq.getSequenceLength(), i.getEnd() + after);
padded.add(new Interval(i.getContig(), start, end, i.isNegativeStrand(), i.getName()));
}
return padded;
}
/** Returns a new IntervalList where each interval is padded by 'padding' bases on each side. */
public IntervalList padded(final int padding) {
return padded(padding, padding);
}
/** returns an independent sorted IntervalList*/
public IntervalList sorted() {
final IntervalList sorted = IntervalList.copyOf(this);
Collections.sort(sorted.intervals, new IntervalCoordinateComparator(sorted.header));
sorted.header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
return sorted;
}
/** Returned an independent IntervalList that is sorted and uniquified. */
public IntervalList uniqued() {
return uniqued(true);
}
/**
* Returned an independent IntervalList that is sorted and uniquified.
* @param concatenateNames If false, interval names are not concatenated when merging intervals to save space.
*/
public IntervalList uniqued(final boolean concatenateNames) {
final List tmp = getUniqueIntervals(sorted(), concatenateNames);
final IntervalList value = new IntervalList(this.header.clone());
value.intervals.addAll(tmp);
return value;
}
/**
* Sorts and uniques the list of intervals held within this interval list.
*
* Note: this function modifies the object in-place and is therefore difficult to work with.
*
* @deprecated use {@link #uniqued()} instead.
*/
@Deprecated
public void unique() {
unique(true);
}
/**
* Sorts and uniques the list of intervals held within this interval list.
*
* Note: this function modifies the object in-place and is therefore difficult to work with.
*
* @param concatenateNames If false, interval names are not concatenated when merging intervals to save space.
* @deprecated use {@link #uniqued(boolean)} instead.
*/
@Deprecated
public void unique(final boolean concatenateNames) {
sort();
final List tmp = getUniqueIntervals(concatenateNames);
this.intervals.clear();
this.intervals.addAll(tmp);
}
/** Gets the set of intervals as held internally. */
public List getIntervals() {
return Collections.unmodifiableList(this.intervals);
}
/**
* Merges the list of intervals and then reduces them down where regions overlap
* or are directly adjacent to one another. During this process the "merged" interval
* will retain the strand and name of the 5' most interval merged.
*
* Note: has the side-effect of sorting the stored intervals in coordinate order if not already sorted.
*
* Note: this function modifies the object in-place and is therefore difficult to work with.
*
* @return the set of unique intervals condensed from the contained intervals
* @deprecated use {@link #uniqued()#getIntervals()} instead.
*/
@Deprecated
public List getUniqueIntervals() {
return getUniqueIntervals(true);
}
//NO SIDE EFFECTS HERE!
/**
* Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals()
* @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
*/
public static List getUniqueIntervals(final IntervalList list, final boolean concatenateNames) {
return getUniqueIntervals(list, concatenateNames, false);
}
//NO SIDE EFFECTS HERE!
/**
* Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals()
* @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
* @param enforceSameStrands enforce that merged intervals have the same strand, otherwise ignore.
*/
public static List getUniqueIntervals(final IntervalList list, final boolean concatenateNames, final boolean enforceSameStrands) {
final List intervals;
if (list.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
intervals = list.sorted().intervals;
}
else {
intervals = list.intervals;
}
final List unique = new ArrayList();
final TreeSet toBeMerged = new TreeSet();
Interval current = null;
for (final Interval next : intervals) {
if (current == null) {
toBeMerged.add(next);
current = next;
}
else if (current.intersects(next) || current.abuts(next)) {
if (enforceSameStrands && current.isNegativeStrand() != next.isNegativeStrand()) throw new SAMException("Strands were not equal for: " + current.toString() + " and " + next.toString());
toBeMerged.add(next);
current = new Interval(current.getContig(), current.getStart(), Math.max(current.getEnd(), next.getEnd()), current.isNegativeStrand(), null);
}
else {
// Emit merged/unique interval
unique.add(merge(toBeMerged, concatenateNames));
// Set current == next for next iteration
toBeMerged.clear();
current = next;
toBeMerged.add(current);
}
}
if (!toBeMerged.isEmpty()) unique.add(merge(toBeMerged, concatenateNames));
return unique;
}
/**
* Merges list of intervals and reduces them like {@link #getUniqueIntervals()}.
*
* Note: this function modifies the object in-place and is therefore difficult to work with.
*
* @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
* @deprecated use {@link #uniqued(boolean)#getIntervals()} or {@link #getUniqueIntervals(IntervalList, boolean)} instead.
*/
@Deprecated
public List getUniqueIntervals(final boolean concatenateNames) {
if (getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
sort();
}
return getUniqueIntervals(this, concatenateNames);
}
/**
* Given a list of Intervals and a band multiple, this method will return a list of Intervals such that all of the intervals
* do not straddle integer multiples of that band.
*
* ex: if there is an interval (7200-9300) and the bandMultiple is 1000, the interval will be split into:
* (7200-7999, 8000-8999, 9000-9300)
* @param intervals A list of Interval
* @param bandMultiple integer value (> 0) to break up intervals in the list at integer multiples of
* @return list of intervals that are broken up
*/
public static List breakIntervalsAtBandMultiples(final List intervals, final int bandMultiple) {
final List brokenUpIntervals = new ArrayList();
for (final Interval interval : intervals) {
if (interval.getEnd() >= interval.getStart()) { // Normal, non-empty intervals
final int startIndex = interval.getStart() / bandMultiple;
final int endIndex = interval.getEnd() / bandMultiple;
if (startIndex == endIndex) {
brokenUpIntervals.add(interval);
} else {
brokenUpIntervals.addAll(breakIntervalAtBandMultiples(interval, bandMultiple));
}
}
else { // Special case - empty intervals ex: (100-99)
brokenUpIntervals.add(interval);
}
}
return brokenUpIntervals;
}
/**
* Given an Interval and a band multiple, this method will return a list of Intervals such that all of the intervals
* do not straddle integer multiples of that band.
*
* ex: if the interval is (7200-9300) and the bandMultiple is 1000, the interval will be split into:
* (7200-7999, 8000-8999, 9000-9300)
* @param interval an Interval
* @param bandMultiple integer value (> 0) to break up intervals in the list at integer multiples of
* @return list of intervals that are broken up
*/
private static List breakIntervalAtBandMultiples(final Interval interval, final int bandMultiple) {
final List brokenUpIntervals = new ArrayList();
int startPos = interval.getStart();
final int startOfIntervalIndex = startPos / bandMultiple;
int startIndex = startOfIntervalIndex;
final int endIndex = interval.getEnd() / bandMultiple;
while (startIndex <= endIndex) {
int endPos = (startIndex + 1) * bandMultiple -1;
if (endPos > interval.getEnd()) {
endPos = interval.getEnd();
}
// add start/end to list of broken up intervals to return (and uniquely name it).
brokenUpIntervals.add(new Interval(interval.getContig(), startPos, endPos, interval.isNegativeStrand(), interval.getName() + "." + (startIndex - startOfIntervalIndex + 1)));
startIndex++;
startPos = startIndex * bandMultiple;
}
return brokenUpIntervals;
}
/** Merges a sorted collection of intervals and optionally concatenates unique names or takes the first name. */
static Interval merge(final SortedSet intervals, final boolean concatenateNames) {
final String chrom = intervals.first().getContig();
int start = intervals.first().getStart();
int end = intervals.last().getEnd();
final boolean neg = intervals.first().isNegativeStrand();
final LinkedHashSet names = new LinkedHashSet();
final String name;
for (final Interval i : intervals) {
if (i.getName() != null) names.add(i.getName());
start = Math.min(start, i.getStart());
end = Math.max(end, i.getEnd());
}
if (names.isEmpty()) name = null;
else if (concatenateNames) name = StringUtil.join("|", names);
else name = names.iterator().next();
return new Interval(chrom, start, end, neg, name);
}
/** Gets the (potentially redundant) sum of the length of the intervals in the list. */
public long getBaseCount() {
return Interval.countBases(this.intervals);
}
/** Gets the count of unique bases represented by the intervals in the list. */
public long getUniqueBaseCount() {
return uniqued().getBaseCount();
}
/** Returns the count of intervals in the list. */
public int size() {
return this.intervals.size();
}
/** creates a independent copy of the given IntervalList
*
* @param list
* @return
*/
public static IntervalList copyOf(final IntervalList list){
final IntervalList clone = new IntervalList(list.header.clone());
clone.intervals.addAll(list.intervals);
return clone;
}
/**
* Parses an interval list from a file.
* @param file the file containing the intervals
* @return an IntervalList object that contains the headers and intervals from the file
*/
public static IntervalList fromFile(final File file) {
return fromPath(IOUtil.toPath(file));
}
/**
* Parses an interval list from a path.
* @param path the path containing the intervals
* @return an IntervalList object that contains the headers and intervals from the path
*/
public static IntervalList fromPath(final Path path) {
final BufferedReader reader = IOUtil.openFileForBufferedReading(path);
final IntervalList list = fromReader(reader);
try {
reader.close();
} catch (final IOException e) {
throw new SAMException(String.format("Failed to close file %s after reading", path.toUri().toString()));
}
return list;
}
/**
* Creates an IntervalList from the given sequence name
* @param header header to use to create IntervalList
* @param sequenceName name of sequence in header
* @return a new intervalList with given header that contains the reference name
*/
public static IntervalList fromName(final SAMFileHeader header, final String sequenceName) {
final IntervalList ref = new IntervalList(header);
ref.add(new Interval(sequenceName, 1, header.getSequence(sequenceName).getSequenceLength()));
return ref;
}
/**
* Calls {@link #fromFile(java.io.File)} on the provided files, and returns their {@link #union(java.util.Collection)}.
*/
public static IntervalList fromFiles(final Collection intervalListFiles) {
final Collection intervalLists = new ArrayList();
for (final File file : intervalListFiles) {
intervalLists.add(IntervalList.fromFile(file));
}
return IntervalList.union(intervalLists);
}
/**
* Parses an interval list from a reader in a stream based fashion.
* @param in a BufferedReader that can be read from
* @return an IntervalList object that contains the headers and intervals from the file
*/
public static IntervalList fromReader(final BufferedReader in) {
try {
// Setup a reader and parse the header
final StringBuilder builder = new StringBuilder(4096);
String line = null;
while ((line = in.readLine()) != null) {
if (line.startsWith("@")) {
builder.append(line).append('\n');
}
else {
break;
}
}
if (builder.length() == 0) {
throw new IllegalStateException("Interval list file must contain header. ");
}
final BufferedLineReader headerReader = BufferedLineReader.fromString(builder.toString());
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
final IntervalList list = new IntervalList(codec.decode(headerReader, "BufferedReader"));
final SAMSequenceDictionary dict = list.getHeader().getSequenceDictionary();
//there might not be any lines after the header, in which case we should return an empty list
if(line == null) return list;
// Then read in the intervals
final FormatUtil format = new FormatUtil();
do {
if (line.trim().isEmpty()) continue; // skip over blank lines
// Make sure we have the right number of fields
final String[] fields = line.split("\t");
if (fields.length != 5) {
throw new SAMException("Invalid interval record contains " +
fields.length + " fields: " + line);
}
// Then parse them out
final String seq = fields[0];
final int start = format.parseInt(fields[1]);
final int end = format.parseInt(fields[2]);
final boolean negative;
if (fields[3].equals("-")) negative = true;
else if (fields[3].equals("+")) negative = false;
else throw new IllegalArgumentException("Invalid strand field: " + fields[3]);
final String name = fields[4];
final Interval interval = new Interval(seq, start, end, negative, name);
if (dict.getSequence(seq) == null) {
log.warn("Ignoring interval for unknown reference: " + interval);
}
else {
list.intervals.add(interval);
}
}
while ((line = in.readLine()) != null);
return list;
}
catch (final IOException ioe) {
throw new SAMException("Error parsing interval list.", ioe);
}
finally {
try { in.close(); } catch (final Exception e) { /* do nothing */ }
}
}
/**
* Writes out the list of intervals to the supplied file.
* @param file a file to write to. If exists it will be overwritten.
*/
public void write(final File file) {
try {
final BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
final FormatUtil format = new FormatUtil();
// Write out the header
if (this.header != null) {
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
codec.encode(out, this.header);
}
// Write out the intervals
for (final Interval interval : this) {
out.write(interval.getContig());
out.write('\t');
out.write(format.format(interval.getStart()));
out.write('\t');
out.write(format.format(interval.getEnd()));
out.write('\t');
out.write(interval.isPositiveStrand() ? '+' : '-');
out.write('\t');
if(interval.getName() != null){
out.write(interval.getName());
}
else{
out.write(".");
}
out.newLine();
}
out.flush();
out.close();
}
catch (final IOException ioe) {
throw new SAMException("Error writing out interval list to file: " + file.getAbsolutePath(), ioe);
}
}
/**
* A utility function for generating the intersection of two IntervalLists, checks for equal dictionaries.
*
* @param list1 the first IntervalList
* @param list2 the second IntervalList
* @return the intersection of list1 and list2.
*/
public static IntervalList intersection(final IntervalList list1, final IntervalList list2) {
final IntervalList result;
// Ensure that all the sequence dictionaries agree and merge the lists
SequenceUtil.assertSequenceDictionariesEqual(list1.getHeader().getSequenceDictionary(),
list2.getHeader().getSequenceDictionary());
result = new IntervalList(list1.getHeader().clone());
final OverlapDetector detector = new OverlapDetector(0, 0);
detector.addAll(list1.getIntervals(), list1.getIntervals());
for (final Interval i : list2.getIntervals()) {
final Collection as = detector.getOverlaps(i);
for (final Interval j : as) {
final Interval tmp = i.intersect(j);
result.add(tmp);
}
}
return result.uniqued();
}
/**
* A utility function for intersecting a list of IntervalLists, checks for equal dictionaries.
*
* @param lists the list of IntervalList
* @return the intersection of all the IntervalLists in lists.
*/
public static IntervalList intersection(final Collection lists) {
IntervalList intersection = null;
for (final IntervalList list : lists) {
if(intersection == null){
intersection = list;
}
else{
intersection = intersection(intersection, list);
}
}
return intersection;
}
/**
* A utility function for merging a list of IntervalLists, checks for equal dictionaries.
* Merging does not look for overlapping intervals nor uniquify
*
* @param lists a list of IntervalList
* @return the union of all the IntervalLists in lists.
*/
public static IntervalList concatenate(final Collection lists) {
if(lists.isEmpty()){
throw new SAMException("Cannot concatenate an empty list of IntervalLists.");
}
// Ensure that all the sequence dictionaries agree and merge the lists
final SAMFileHeader header = lists.iterator().next().getHeader().clone();
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
final IntervalList merged = new IntervalList(header);
for (final IntervalList in : lists) {
SequenceUtil.assertSequenceDictionariesEqual(merged.getHeader().getSequenceDictionary(),
in.getHeader().getSequenceDictionary());
merged.addall(in.intervals);
}
return merged;
}
/**
* A utility function for finding the union of a list of IntervalLists, checks for equal dictionaries.
* also looks for overlapping intervals, uniquifies, and sorts (by coordinate)
*
* @param lists the list of IntervalList
* @return the union of all the IntervalLists in lists.
*/
public static IntervalList union(final Collection lists) {
final IntervalList merged = concatenate(lists);
return merged.uniqued();
}
public static IntervalList union(final IntervalList list1, final IntervalList list2) {
final Collection duo = CollectionUtil.makeList(list1, list2);
return IntervalList.union(duo);
}
/** inverts an IntervalList and returns one that has exactly all the bases in the dictionary that the original one does not.
*
* @param list an IntervalList
* @return an IntervalList that is complementary to list
*/
public static IntervalList invert(final IntervalList list) {
final IntervalList inverse = new IntervalList(list.header.clone());
final ListMap map = new ListMap();
//add all the intervals (uniqued and therefore also sorted) to a ListMap from sequenceIndex to a list of Intervals
for(final Interval i : list.uniqued().getIntervals()){
map.add(list.getHeader().getSequenceIndex(i.getContig()),i);
}
// a counter to supply newly-created intervals with a name
int intervals = 0;
//iterate over the contigs in the dictionary
for (final SAMSequenceRecord samSequenceRecord : list.getHeader().getSequenceDictionary().getSequences()) {
final Integer sequenceIndex = samSequenceRecord.getSequenceIndex();
final String sequenceName = samSequenceRecord.getSequenceName();
final int sequenceLength = samSequenceRecord.getSequenceLength();
Integer lastCoveredPosition = 0; //start at beginning of sequence
//iterate over list of intervals that are in sequence
if (map.containsKey(sequenceIndex)) // if there are intervals in the ListMap on this contig, iterate over them (in order)
for (final Interval i : map.get(sequenceIndex)) {
if (i.getStart() > lastCoveredPosition + 1) //if there's space between the last interval and the current one, add an interval between them
inverse.add(new Interval(sequenceName, lastCoveredPosition + 1, i.getStart() - 1, false, "interval-" + (++intervals)));
lastCoveredPosition = i.getEnd(); //update the last covered position
}
//finally, if there's room between the last covered position and the end of the sequence, add an interval
if (sequenceLength > lastCoveredPosition) //if there's space between the last interval and the next
// one, add an interval. This also covers the case that there are no intervals in the ListMap for a contig.
inverse.add(new Interval(sequenceName, lastCoveredPosition + 1, sequenceLength, false, "interval-" + (++intervals)));
}
return inverse;
}
/**
* A utility function for subtracting a collection of IntervalLists from another. Resulting loci are those that are in the first collection
* but not the second.
*
* @param lhs the collection of IntervalList from which to subtract intervals
* @param rhs the collection of intervals to subtract
* @return an IntervalList comprising all loci that are in the first collection but not the second lhs-rhs=answer.
*/
public static IntervalList subtract(final Collection lhs, final Collection rhs) {
return intersection(
union(lhs),
invert(union(rhs)));
}
/**
* A utility function for subtracting a single IntervalList from another. Resulting loci are those that are in the first List
* but not the second.
*
* @param lhs the IntervalList from which to subtract intervals
* @param rhs the IntervalList to subtract
* @return an IntervalList comprising all loci that are in first IntervalList but not the second. lhs-rhs=answer
*/
public static IntervalList subtract(final IntervalList lhs, final IntervalList rhs) {
return subtract(Collections.singletonList(lhs),
Collections.singletonList(rhs));
}
/**
* A utility function for finding the difference between two IntervalLists.
*
* @param lists1 the first collection of IntervalLists
* @param lists2 the second collection of IntervalLists
* @return the difference between the two intervals, i.e. the loci that are only in one IntervalList but not both
*/
public static IntervalList difference(final Collection lists1, final Collection lists2) {
return union(
subtract(lists1, lists2),
subtract(lists2, lists1));
}
/**
* A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval
* in the second list.
*
* @param lhs the first collection of IntervalLists
* @param lhs the second collection of IntervalLists
* @return an IntervalList comprising of all intervals in the first IntervalList that have at least 1bp overlap with
* any interval in the second.
*/
public static IntervalList overlaps(final IntervalList lhs, final IntervalList rhs) {
return overlaps(Collections.singletonList(lhs), Collections.singletonList(rhs));
}
/**
* A utility function for finding the intervals in the first list that have at least 1bp overlap with any interval
* in the second list.
*
* @param lists1 the first collection of IntervalLists
* @param lists2 the second collection of IntervalLists
* @return an IntervalList comprising of all intervals in the first collection of lists that have at least 1bp
* overlap with any interval in the second lists.
*/
public static IntervalList overlaps(final Collection lists1, final Collection lists2) {
if(lists1.isEmpty()){
throw new SAMException("Cannot call overlaps with the first collection having empty list of IntervalLists.");
}
final SAMFileHeader header = lists1.iterator().next().getHeader().clone();
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
// Create an overlap detector on list2
final IntervalList overlapIntervals = new IntervalList(header);
for (final IntervalList list : lists2) {
SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(),
list.getHeader().getSequenceDictionary());
overlapIntervals.addall(list.getIntervals());
}
final OverlapDetector detector = new OverlapDetector<>(0, 0);
final int dummy = -1; // NB: since we don't actually use the returned objects, we can use a dummy value
for (final Interval interval : overlapIntervals.sorted().uniqued()) {
detector.addLhs(dummy, interval);
}
// Go through each input interval in in lists1 and see if overlaps any interval in lists2
final IntervalList merged = new IntervalList(header);
for (final IntervalList list : lists1) {
SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(),
list.getHeader().getSequenceDictionary());
for (final Interval interval : list.getIntervals()) {
if (detector.overlapsAny(interval)) {
merged.add(interval);
}
}
}
return merged;
}
@Override
public boolean equals(final Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
final IntervalList intervals1 = (IntervalList) o;
return header.equals(intervals1.header) && intervals.equals(intervals1.intervals);
}
@Override
public int hashCode() {
int result = header.hashCode();
result = 31 * result + intervals.hashCode();
return result;
}
}
/**
* Comparator that orders intervals based on their sequence index, by coordinate
* then by strand and finally by name.
*/
class IntervalCoordinateComparator implements Comparator, Serializable {
private static final long serialVersionUID = 1L;
private final SAMFileHeader header;
/** Constructs a comparator using the supplied sequence header. */
IntervalCoordinateComparator(final SAMFileHeader header) {
this.header = header;
}
@Override
public int compare(final Interval lhs, final Interval rhs) {
final int lhsIndex = this.header.getSequenceIndex(lhs.getContig());
final int rhsIndex = this.header.getSequenceIndex(rhs.getContig());
int retval = lhsIndex - rhsIndex;
if (retval == 0) retval = lhs.getStart() - rhs.getStart();
if (retval == 0) retval = lhs.getEnd() - rhs.getEnd();
if (retval == 0) {
if (lhs.isPositiveStrand() && rhs.isNegativeStrand()) retval = -1;
else if (lhs.isNegativeStrand() && rhs.isPositiveStrand()) retval = 1;
}
if (retval == 0) {
if (lhs.getName() == null) {
if (rhs.getName() == null) return 0;
else return -1;
} else if (rhs.getName() == null) {
return 1;
}
else {
return lhs.getName().compareTo(rhs.getName());
}
}
return retval;
}
}