Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.liftover;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.*;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented.
* Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive.
*
* @author [email protected]
*/
public class LiftOver {
private static final Log LOG = Log.getInstance(LiftOver.class);
public static final double DEFAULT_LIFTOVER_MINMATCH = 0.95;
private double liftOverMinMatch = DEFAULT_LIFTOVER_MINMATCH;
private final OverlapDetector chains;
private final Map> contigMap = new HashMap<>();
private boolean logFailedIntervals = true;
private long totalFailedIntervalsBelowThreshold = 0L;
/**
* By default any lifted interval that falls below liftOverMinMatch
* will be logged. Set this to false to prevent logging.
* @param logFailedIntervals
*/
public void setShouldLogFailedIntervalsBelowThreshold(boolean logFailedIntervals) {
this.logFailedIntervals = logFailedIntervals;
}
/**
* Resets the internal counter that tracks intervals that failed liftover due to insufficient intersection length
*/
public void resetFailedIntervalsBelowThresholdCounter() {
this.totalFailedIntervalsBelowThreshold = 0L;
}
/**
*
* @return The total number of intervals that have failed liftover due to insufficient intersection length
*/
public long getFailedIntervalsBelowThreshold() {
return totalFailedIntervalsBelowThreshold;
}
/**
* Load UCSC chain file in order to lift over Intervals.
*/
public LiftOver(File chainFile){
this(Chain.loadChains(chainFile));
}
/**
* Load UCSC chain file in order to lift over Intervals.
*/
public LiftOver(InputStream chainFileInputStream, String sourceName) {
this(Chain.loadChains(new BufferedLineReader(chainFileInputStream), sourceName));
}
private LiftOver(OverlapDetector chains) {
this.chains = chains;
for (final Chain chain : this.chains.getAll()) {
final String from = chain.fromSequenceName;
final String to = chain.toSequenceName;
final Set names;
if (contigMap.containsKey(from)) {
names = contigMap.get(from);
}
else {
names = new HashSet<>();
contigMap.put(from, names);
}
names.add(to);
}
}
/**
* Throw an exception if all the "to" sequence names in the chains are not found in the given sequence dictionary.
*/
public void validateToSequences(final SAMSequenceDictionary sequenceDictionary) {
for (final Chain chain : chains.getAll()) {
if (sequenceDictionary.getSequence(chain.toSequenceName) == null) {
throw new SAMException("Sequence " + chain.toSequenceName + " from chain file is not found in sequence dictionary.");
}
}
}
/**
* Lift over the given interval to the new genome build using the liftOverMinMatch set for this
* LiftOver object.
* @param interval Interval to be lifted over.
* @return Interval in the output build coordinates, or null if it cannot be lifted over.
*/
public Interval liftOver(final Interval interval) {
return liftOver(interval, liftOverMinMatch);
}
/**
* Lift over the given interval to the new genome build.
* @param interval Interval to be lifted over.
* @param liftOverMinMatch Minimum fraction of bases that must remap.
* @return Interval in the output build coordinates, or null if it cannot be lifted over.
*/
public Interval liftOver(final Interval interval, final double liftOverMinMatch) {
if (interval.length() == 0) {
throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " +
interval.getName());
}
Chain chainHit = null;
TargetIntersection targetIntersection = null;
// Number of bases in interval that can be lifted over must be >= this.
double minMatchSize = liftOverMinMatch * interval.length();
// Find the appropriate Chain, and the part of the chain corresponding to the interval to be lifted over.
boolean hasOverlapBelowThreshold = false;
for (final Chain chain : chains.getOverlaps(interval)) {
final TargetIntersection candidateIntersection = targetIntersection(chain, interval);
if (candidateIntersection != null && candidateIntersection.intersectionLength >= minMatchSize) {
if (chainHit != null) {
// In basic liftOver, multiple hits are not allowed.
return null;
}
chainHit = chain;
targetIntersection = candidateIntersection;
} else if (candidateIntersection != null) {
hasOverlapBelowThreshold = true;
if (logFailedIntervals) {
LOG.info("Interval " + interval.getName() + " failed to match chain " + chain.id +
" because intersection length " + candidateIntersection.intersectionLength + " < minMatchSize "
+ minMatchSize +
" (" + (candidateIntersection.intersectionLength/(float)interval.length()) + " < " + liftOverMinMatch + ")");
}
}
}
if (chainHit == null) {
if (hasOverlapBelowThreshold) {
totalFailedIntervalsBelowThreshold++;
}
// Can't be lifted over.
return null;
}
return createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection);
}
public List diagnosticLiftover(final Interval interval) {
final List ret = new ArrayList();
if (interval.length() == 0) {
throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " +
interval.getName());
}
for (final Chain chain : chains.getOverlaps(interval)) {
Interval intersectingChain = interval.intersect(chain.interval);
final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain);
if (targetIntersection == null) {
ret.add(new PartialLiftover(intersectingChain, chain.id));
} else {
Interval toInterval = createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection);
float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length();
ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver));
}
}
return ret;
}
/**
* @return the set of destination contigs for each source contig in the chains file.
*/
public Map> getContigMap() {
return Collections.unmodifiableMap(contigMap);
}
private static Interval createToInterval(final String intervalName, final boolean sourceNegativeStrand, final TargetIntersection targetIntersection) {
// Compute the query interval given the offsets of the target interval start and end into the first and
// last ContinuousBlocks.
int toStart = targetIntersection.chain.getBlock(targetIntersection.firstBlockIndex).toStart + targetIntersection.startOffset;
int toEnd = targetIntersection.chain.getBlock(targetIntersection.lastBlockIndex).getToEnd() - targetIntersection.offsetFromEnd;
if (toEnd <= toStart || toStart < 0) {
throw new SAMException("Something strange lifting over interval " + intervalName);
}
if (targetIntersection.chain.toOppositeStrand) {
// Flip if query is negative.
int negativeStart = targetIntersection.chain.toSequenceSize - toEnd;
int negativeEnd = targetIntersection.chain.toSequenceSize - toStart;
toStart = negativeStart;
toEnd = negativeEnd;
}
// Convert to 1-based, inclusive.
final boolean negativeStrand = targetIntersection.chain.toOppositeStrand ? !sourceNegativeStrand : sourceNegativeStrand;
return new Interval(targetIntersection.chain.toSequenceName, toStart+1, toEnd, negativeStrand, intervalName);
}
/**
* Add up overlap btw the blocks in this chain and the given interval.
* @return Length of overlap, offsets into first and last ContinuousBlocks, and indices of first and
* last ContinuousBlocks.
*/
private static TargetIntersection targetIntersection(final Chain chain, final Interval interval) {
int intersectionLength = 0;
// Convert interval to 0-based, half-open
int start = interval.getStart() - 1;
int end = interval.getEnd();
int firstBlockIndex = -1;
int lastBlockIndex = -1;
int startOffset = -1;
int offsetFromEnd = -1;
List blockList = chain.getBlocks();
for (int i = 0; i < blockList.size(); ++i) {
final Chain.ContinuousBlock block = blockList.get(i);
if (block.fromStart >= end) {
break;
} else if (block.getFromEnd() <= start) {
continue;
}
if (firstBlockIndex == -1) {
firstBlockIndex = i;
if (start > block.fromStart) {
startOffset = start - block.fromStart;
} else {
startOffset = 0;
}
}
lastBlockIndex = i;
if (block.getFromEnd() > end) {
offsetFromEnd = block.getFromEnd() - end;
} else {
offsetFromEnd = 0;
}
int thisIntersection = Math.min(end, block.getFromEnd()) - Math.max(start, block.fromStart);
if (thisIntersection <= 0) {
throw new SAMException("Should have been some intersection.");
}
intersectionLength += thisIntersection;
}
if (intersectionLength == 0) {
return null;
}
return new TargetIntersection(chain, intersectionLength, startOffset, offsetFromEnd, firstBlockIndex, lastBlockIndex);
}
/**
* Get minimum fraction of bases that must remap.
*/
public double getLiftOverMinMatch() {
return liftOverMinMatch;
}
/**
* Set minimum fraction of bases that must remap.
*/
public void setLiftOverMinMatch(final double liftOverMinMatch) {
this.liftOverMinMatch = liftOverMinMatch;
}
/**
* Value class returned by targetIntersection()
*/
private static class TargetIntersection {
/** Chain used for this intersection */
final Chain chain;
/** Total intersectionLength length */
final int intersectionLength;
/** Offset of target interval start in first block. */
final int startOffset;
/** Distance from target interval end to end of last block. */
final int offsetFromEnd;
/** Index of first ContinuousBlock matching interval. */
final int firstBlockIndex;
/** Index of last ContinuousBlock matching interval. */
final int lastBlockIndex;
TargetIntersection(final Chain chain,final int intersectionLength, final int startOffset,
final int offsetFromEnd, final int firstBlockIndex, final int lastBlockIndex) {
this.chain = chain;
this.intersectionLength = intersectionLength;
this.startOffset = startOffset;
this.offsetFromEnd = offsetFromEnd;
this.firstBlockIndex = firstBlockIndex;
this.lastBlockIndex = lastBlockIndex;
}
}
/**
* Represents a portion of a liftover operation, for use in diagnosing liftover failures.
*/
public static class PartialLiftover {
/** Intersection between "from" interval and "from" region of a chain. */
final Interval fromInterval;
/**
* Result of lifting over fromInterval (with no percentage mapped requirement). This is null
* if fromInterval falls entirely with a gap of the chain. */
final Interval toInterval;
/** id of chain used for this liftover */
final int chainId;
/** Percentage of bases in fromInterval that lifted over. 0 if fromInterval is not covered by any chain. */
final float percentLiftedOver;
PartialLiftover(final Interval fromInterval, final Interval toInterval, final int chainId, final float percentLiftedOver) {
this.fromInterval = fromInterval;
this.toInterval = toInterval;
this.chainId = chainId;
this.percentLiftedOver = percentLiftedOver;
}
PartialLiftover(final Interval fromInterval, final int chainId) {
this.fromInterval = fromInterval;
this.toInterval = null;
this.chainId = chainId;
this.percentLiftedOver = 0.0f;
}
public String toString() {
if (toInterval == null) {
// Matched a chain, but entirely within a gap.
return fromInterval.toString() + " (len " + fromInterval.length() + ")=>null using chain " + chainId;
}
final String strand = toInterval.getStrand().encode();
return fromInterval.toString() + " (len " + fromInterval.length() + ")=>" + toInterval + "(" + strand
+ ") using chain " + chainId + " ; pct matched " + percentLiftedOver;
}
}
}