All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.liftover.LiftOver Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package htsjdk.samtools.liftover;

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Java port of UCSC liftOver.  Only the most basic liftOver functionality is implemented.
 * Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive.
 *
 * @author [email protected]
 */
public class LiftOver {
    private static final Log LOG = Log.getInstance(LiftOver.class);
    
    public static final double DEFAULT_LIFTOVER_MINMATCH = 0.95;

    private double liftOverMinMatch = DEFAULT_LIFTOVER_MINMATCH;
    private final OverlapDetector chains;
    private final Map> contigMap = new HashMap<>();

    private boolean logFailedIntervals = true;
    private long totalFailedIntervalsBelowThreshold = 0L;

    /**
     * By default any lifted interval that falls below liftOverMinMatch
     * will be logged.  Set this to false to prevent logging.
     * @param logFailedIntervals
     */
    public void setShouldLogFailedIntervalsBelowThreshold(boolean logFailedIntervals) {
        this.logFailedIntervals = logFailedIntervals;
    }

    /**
     * Resets the internal counter that tracks intervals that failed liftover due to insufficient intersection length
     */
    public void resetFailedIntervalsBelowThresholdCounter() {
        this.totalFailedIntervalsBelowThreshold = 0L;
    }

    /**
     *
     * @return The total number of intervals that have failed liftover due to insufficient intersection length
     */
    public long getFailedIntervalsBelowThreshold() {
        return totalFailedIntervalsBelowThreshold;
    }

    /**
     * Load UCSC chain file in order to lift over Intervals.
     */
    public LiftOver(File chainFile) {
        IOUtil.assertFileIsReadable(chainFile);
        chains = Chain.loadChains(chainFile);

        for (final Chain chain : this.chains.getAll()) {
            final String from = chain.fromSequenceName;
            final String to   = chain.toSequenceName;
            final Set names;
            if (contigMap.containsKey(from)) {
                names = contigMap.get(from);
            }
            else {
                names = new HashSet<>();
                contigMap.put(from, names);
            }
            names.add(to);
        }
    }

    /**
     * Throw an exception if all the "to" sequence names in the chains are not found in the given sequence dictionary.
     */
    public void validateToSequences(final SAMSequenceDictionary sequenceDictionary) {
        for (final Chain chain : chains.getAll()) {
            if (sequenceDictionary.getSequence(chain.toSequenceName) == null) {
                throw new SAMException("Sequence " + chain.toSequenceName + " from chain file is not found in sequence dictionary.");
            }
        }

    }

    /**
     * Lift over the given interval to the new genome build using the liftOverMinMatch set for this
     * LiftOver object.
     * @param interval Interval to be lifted over.
     * @return Interval in the output build coordinates, or null if it cannot be lifted over.
     */
    public Interval liftOver(final Interval interval) {
        return liftOver(interval, liftOverMinMatch);
    }

    /**
     * Lift over the given interval to the new genome build.
     * @param interval Interval to be lifted over.
     * @param liftOverMinMatch Minimum fraction of bases that must remap.
     * @return Interval in the output build coordinates, or null if it cannot be lifted over.
     */
    public Interval liftOver(final Interval interval, final double liftOverMinMatch) {
        if (interval.length() == 0) {
            throw new IllegalArgumentException("Zero-length interval cannot be lifted over.  Interval: " +
                    interval.getName());
        }
        Chain chainHit = null;
        TargetIntersection targetIntersection = null;
        // Number of bases in interval that can be lifted over must be >= this.
        double minMatchSize = liftOverMinMatch * interval.length();

        // Find the appropriate Chain, and the part of the chain corresponding to the interval to be lifted over.
        boolean hasOverlapBelowThreshold = false;
        for (final Chain chain : chains.getOverlaps(interval)) {
            final TargetIntersection candidateIntersection = targetIntersection(chain, interval);
            if (candidateIntersection != null && candidateIntersection.intersectionLength >= minMatchSize) {
                if (chainHit != null) {
                    // In basic liftOver, multiple hits are not allowed.
                    return null;
                }
                chainHit = chain;
                targetIntersection = candidateIntersection;
            } else if (candidateIntersection != null) {
                hasOverlapBelowThreshold = true;
                if (logFailedIntervals) {
                    LOG.info("Interval " + interval.getName() + " failed to match chain " + chain.id +
                            " because intersection length " + candidateIntersection.intersectionLength + " < minMatchSize "
                            + minMatchSize +
                            " (" + (candidateIntersection.intersectionLength/(float)interval.length()) + " < " + liftOverMinMatch + ")");
                }
            }
        }
        if (chainHit == null) {
            if (hasOverlapBelowThreshold) {
                totalFailedIntervalsBelowThreshold++;
            }

            // Can't be lifted over.
            return null;
        }

        return createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection);
    }

    public List diagnosticLiftover(final Interval interval) {
        final List ret = new ArrayList();
        if (interval.length() == 0) {
            throw new IllegalArgumentException("Zero-length interval cannot be lifted over.  Interval: " +
                    interval.getName());
        }
        for (final Chain chain : chains.getOverlaps(interval)) {
            Interval intersectingChain = interval.intersect(chain.interval);
            final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain);
            if (targetIntersection == null) {
                ret.add(new PartialLiftover(intersectingChain, chain.id));
            } else {
                Interval toInterval = createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection);
                float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length();
                ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver));
            }
        }
        return ret;
    }

    /**
     * @return the set of destination contigs for each source contig in the chains file.
     */
    public Map> getContigMap() {
        return Collections.unmodifiableMap(contigMap);
    }

    private static Interval createToInterval(final String intervalName, final boolean sourceNegativeStrand, final TargetIntersection targetIntersection) {
        // Compute the query interval given the offsets of the target interval start and end into the first and
        // last ContinuousBlocks.
        int toStart = targetIntersection.chain.getBlock(targetIntersection.firstBlockIndex).toStart + targetIntersection.startOffset;
        int toEnd = targetIntersection.chain.getBlock(targetIntersection.lastBlockIndex).getToEnd() - targetIntersection.offsetFromEnd;
        if (toEnd <= toStart || toStart < 0) {
            throw new SAMException("Something strange lifting over interval " + intervalName);
        }

        if (targetIntersection.chain.toOppositeStrand) {
            // Flip if query is negative.
            int negativeStart = targetIntersection.chain.toSequenceSize - toEnd;
            int negativeEnd = targetIntersection.chain.toSequenceSize - toStart;
            toStart = negativeStart;
            toEnd = negativeEnd;
        }
        // Convert to 1-based, inclusive.
        final boolean negativeStrand = targetIntersection.chain.toOppositeStrand ? !sourceNegativeStrand : sourceNegativeStrand;
        return new Interval(targetIntersection.chain.toSequenceName, toStart+1, toEnd, negativeStrand, intervalName);
    }

    /**
     * Add up overlap btw the blocks in this chain and the given interval.
     * @return Length of overlap, offsets into first and last ContinuousBlocks, and indices of first and
     * last ContinuousBlocks.
     */
    private static TargetIntersection targetIntersection(final Chain chain, final Interval interval) {
        int intersectionLength = 0;
        // Convert interval to 0-based, half-open
        int start = interval.getStart() - 1;
        int end = interval.getEnd();
        int firstBlockIndex = -1;
        int lastBlockIndex = -1;
        int startOffset = -1;
        int offsetFromEnd = -1;
        List blockList = chain.getBlocks();
        for (int i = 0; i < blockList.size(); ++i) {
            final Chain.ContinuousBlock block = blockList.get(i);
            if (block.fromStart >= end) {
                break;
            } else if (block.getFromEnd() <= start) {
                continue;
            }
            if (firstBlockIndex == -1) {
                firstBlockIndex = i;
                if (start > block.fromStart) {
                    startOffset = start - block.fromStart;
                } else {
                    startOffset = 0;
                }
            }
            lastBlockIndex = i;
            if (block.getFromEnd() > end) {
                offsetFromEnd = block.getFromEnd() - end;
            } else {
                offsetFromEnd = 0;
            }
            int thisIntersection = Math.min(end, block.getFromEnd()) - Math.max(start, block.fromStart);
            if (thisIntersection <= 0) {
                throw new SAMException("Should have been some intersection.");
            }
            intersectionLength += thisIntersection;
        }
        if (intersectionLength == 0) {
            return null;
        }
        return new TargetIntersection(chain, intersectionLength, startOffset, offsetFromEnd, firstBlockIndex, lastBlockIndex);
    }

    /**
     * Get minimum fraction of bases that must remap.
     */
    public double getLiftOverMinMatch() {
        return liftOverMinMatch;
    }

    /**
     * Set minimum fraction of bases that must remap.
     */
    public void setLiftOverMinMatch(final double liftOverMinMatch) {
        this.liftOverMinMatch = liftOverMinMatch;
    }

    /**
    * Value class returned by targetIntersection()
    */
    private static class TargetIntersection {
        /** Chain used for this intersection */
        final Chain chain;
        /** Total intersectionLength length */
        final int intersectionLength;
        /** Offset of target interval start in first block. */
        final int startOffset;
        /** Distance from target interval end to end of last block. */
        final int offsetFromEnd;
        /** Index of first ContinuousBlock matching interval. */
        final int firstBlockIndex;
        /** Index of last ContinuousBlock matching interval. */
        final int lastBlockIndex;

        TargetIntersection(final Chain chain,final int intersectionLength, final int startOffset,
                           final int offsetFromEnd, final int firstBlockIndex, final int lastBlockIndex) {
            this.chain = chain;
            this.intersectionLength = intersectionLength;
            this.startOffset = startOffset;
            this.offsetFromEnd = offsetFromEnd;
            this.firstBlockIndex = firstBlockIndex;
            this.lastBlockIndex = lastBlockIndex;
        }
    }

    /**
     * Represents a portion of a liftover operation, for use in diagnosing liftover failures.
     */
    public static class PartialLiftover {
        /** Intersection between "from" interval and "from" region of a chain. */
        final Interval fromInterval;
        /**
         * Result of lifting over fromInterval (with no percentage mapped requirement).  This is null
         * if fromInterval falls entirely with a gap of the chain. */
        final Interval toInterval;
        /** id of chain used for this liftover */
        final int chainId;
        /** Percentage of bases in fromInterval that lifted over.  0 if fromInterval is not covered by any chain. */
        final float percentLiftedOver;

        PartialLiftover(final Interval fromInterval, final Interval toInterval, final int chainId, final float percentLiftedOver) {
            this.fromInterval = fromInterval;
            this.toInterval = toInterval;
            this.chainId = chainId;
            this.percentLiftedOver = percentLiftedOver;
        }

        PartialLiftover(final Interval fromInterval, final int chainId) {
            this.fromInterval = fromInterval;
            this.toInterval = null;
            this.chainId = chainId;
            this.percentLiftedOver = 0.0f;
        }

        public String toString() {
            if (toInterval == null) {
                // Matched a chain, but entirely within a gap.
                return fromInterval.toString() + " (len " + fromInterval.length() + ")=>null using chain " + chainId;
            }
            final String strand = toInterval.getStrand().encode();
            return fromInterval.toString() + " (len " + fromInterval.length() + ")=>" + toInterval + "(" + strand
                    + ") using chain " + chainId + " ; pct matched " + percentLiftedOver;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy