picard.vcf.LiftoverVcf Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of picard Show documentation
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF.
There is a newer version: 3.2.0
Show newest version
package picard.vcf;

/*
 * The MIT License
 *
 * Copyright (c) 2017 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.liftover.LiftOver;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.*;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.*;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.programgroups.VariantManipulationProgramGroup;
import picard.util.LiftoverUtils;

import java.io.File;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;
import java.util.stream.Collectors;

/**
 * Summary
 * Tool for "lifting over" a VCF from one genome build to another, producing a properly headered,
 * sorted and indexed VCF in one go.
 *
 * Details
 * This tool adjusts the coordinates of variants within a VCF file to match a new reference. The
 * output file will be sorted and indexed using the target reference build. To be clear, {@link #REFERENCE_SEQUENCE} should be the
 * target reference build (that is, the "new" one). The tool is based on the UCSC LiftOver tool
 * and uses a UCSC chain file to guide its operation. 

 *
 * For each variant, the tool will look for the target coordinate, reverse-complement and left-align the variant if needed,
 * and, in the case that the reference and alternate alleles of a SNP have been swapped in the new genome build, it will
 * adjust the SNP, and correct AF-like INFO fields and the relevant genotypes.
 * 

 *
 * Example
 *  * java -jar picard.jar LiftoverVcf \\
 *     I=input.vcf \\
 *     O=lifted_over.vcf \\
 *     CHAIN=b37tohg38.chain \\
 *     REJECT=rejected_variants.vcf \\
 *     R=reference_sequence.fasta
 * 
 * Caveats
 * Rejected Records
 * Records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the
 * source and target reference genomes.  Rejected records will be emitted to the {@link #REJECT} file using the source
 * genome build coordinates. The reason for the rejection will be stated in the FILTER field, and more detail may be placed
 * in the INFO field.
 * Memory Use
 * LiftOverVcf sorts the output using a {@link htsjdk.samtools.util.SortingCollection} which relies on {@link #MAX_RECORDS_IN_RAM}
 * to specify how many (vcf) records to hold in memory before "spilling" to disk. The default value is reasonable when sorting SAM files,
 * but not for VCFs as there is no good default due to the dependence on the number of samples and amount of information in the INFO and FORMAT
 * fields. Consider lowering to 100,000 or even less if you have many genotypes.
 *
 * @author Tim Fennell
 */
@CommandLineProgramProperties(
        summary = LiftoverVcf.USAGE_SUMMARY + LiftoverVcf.USAGE_DETAILS,
        oneLineSummary = LiftoverVcf.USAGE_SUMMARY,
        programGroup = VariantManipulationProgramGroup.class)
@DocumentedFeature
public class LiftoverVcf extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Lifts over a VCF file from one reference build to another.  ";
    static final String USAGE_DETAILS = "Summary\n" +
            "Tool for \"lifting over\" a VCF from one genome build to another, producing a properly headered, " +
            "sorted and indexed VCF in one go.\n" +
            "\n" +
            "Details\n" +
            "This tool adjusts the coordinates of variants within a VCF file to match a new reference. The " +
            "output file will be sorted and indexed using the target reference build. To be clear, REFERENCE_SEQUENCE should be the " +
            "target reference build (that is, the \"new\" one). The tool is based on the UCSC LiftOver tool (see http://genome.ucsc.edu/cgi-bin/hgLiftOver) " +
            "and uses a UCSC chain file to guide its operation.\n" +
            "\n" +
            "For each variant, the tool will look for the target coordinate, reverse-complement and left-align the variant if needed, " +
            "and, in the case that the reference and alternate alleles of a SNP have been swapped in the new genome build, it will " +
            "adjust the SNP, and correct AF-like INFO fields and the relevant genotypes." +
            "\n" +
            "\n" +
            "Example\n" +
            "java -jar picard.jar LiftoverVcf \\\n" +
            "    I=input.vcf \\\n" +
            "    O=lifted_over.vcf \\\n" +
            "    CHAIN=b37tohg38.chain \\\n" +
            "    REJECT=rejected_variants.vcf \\\n" +
            "    R=reference_sequence.fasta\n" +
            "\n" +
            "Caveats\n" +
            "Rejected Records\n" +
            "Records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the " +
            "source and target reference genomes.  Rejected records will be emitted to the REJECT file using the source " +
            "genome build coordinates. The reason for the rejection will be stated in the FILTER field, and more detail may be placed " +
            "in the INFO field.\n" +
            "Memory Use\n" +
            "LiftOverVcf sorts the output using a \"SortingCollection\" which relies on MAX_RECORDS_IN_RAM " +
            "to specify how many (vcf) records to hold in memory before \"spilling\" to disk. The default value is reasonable when sorting SAM files, " +
            "but not for VCFs as there is no good default due to the dependence on the number of samples and amount of information in the INFO and FORMAT " +
            "fields. Consider lowering to 100,000 or even less if you have many genotypes.\n";

    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input VCF/BCF file to be lifted over.")
    public File INPUT;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output location for the lifted over VCF/BCF.")
    public File OUTPUT;

    @Argument(shortName = "C", doc = "The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description" +
            " of chain files.  See http://hgdownload.soe.ucsc.edu/downloads.html#terms for where to download chain files.")
    public File CHAIN;

    @Argument(doc = "File to which to write rejected records.")
    public File REJECT;

    // Option on whether or not to provide a warning, or error message and exit if a missing contig is encountered
    @Argument(shortName = "WMC", doc = "Warn on missing contig.", optional = true)
    public boolean WARN_ON_MISSING_CONTIG = false;

    @Argument(shortName = "LFI", doc = "If true, intervals failing due to match below LIFTOVER_MIN_MATCH will be logged as a warning to the console.", optional = true)
    public boolean LOG_FAILED_INTERVALS = true;

    // Option on whether or not to write the original contig/position of the variant to the INFO field
    @Argument(doc = "Write the original contig/position for lifted variants to the INFO field.", optional = true)
    public boolean WRITE_ORIGINAL_POSITION = false;

    // Option on whether or not to write the original alleles of the variant to the INFO field
    @Argument(doc = "Write the original alleles for lifted variants to the INFO field.  If the alleles are identical, this attribute will be omitted.", optional = true)
    public boolean WRITE_ORIGINAL_ALLELES = false;

    @Argument(doc = "The minimum percent match required for a variant to be lifted.", optional = true)
    public double LIFTOVER_MIN_MATCH = 1.0;

    @Argument(doc = "Allow INFO and FORMAT in the records that are not found in the header", optional = true)
    public boolean ALLOW_MISSING_FIELDS_IN_HEADER = false;


    @Argument(doc = "If the REF allele of the lifted site does not match the target genome, that variant is normally rejected. " +
            "For bi-allelic SNPs, if this is set to true and the ALT allele equals the new REF allele, the REF and ALT alleles will be swapped.  This can rescue " +
            "some variants; however, do this carefully as some annotations may become invalid, such as any that are alelle-specifc.  See also TAGS_TO_REVERSE and TAGS_TO_DROP.", optional = true)
    public boolean RECOVER_SWAPPED_REF_ALT = false;

    @Argument(doc = "INFO field annotations that behave like an Allele Frequency and should be transformed with x->1-x " +
            "when swapping reference with variant alleles.", optional = true)
    public Collection TAGS_TO_REVERSE = new ArrayList<>(LiftoverUtils.DEFAULT_TAGS_TO_REVERSE);

    @Argument(doc = "INFO field annotations that should be deleted when swapping reference with variant alleles.", optional = true)
    public Collection TAGS_TO_DROP = new ArrayList<>(LiftoverUtils.DEFAULT_TAGS_TO_DROP);

    @Argument(doc = "Output VCF file will be written on the fly but it won't be sorted and indexed.", optional = true)
    public boolean DISABLE_SORT = false;

    // When a contig used in the chain is not in the reference, exit with this value instead of 0.
    public static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1;

    /**
     * Filter name to use when a target cannot be lifted over.
     */
    public static final String FILTER_CANNOT_LIFTOVER_REV_COMP = "CannotLiftOver";

    /**
     * Filter name to use when a target cannot be lifted over.
     */
    public static final String FILTER_NO_TARGET = "NoTarget";

    /**
     * Filter name to use when a target is lifted over, but the reference allele doesn't match the new reference.
     */
    public static final String FILTER_MISMATCHING_REF_ALLELE = "MismatchedRefAllele";

    /**
     * Filter name to use when an indel cannot be lifted over since it straddles two intervals in a chain which means
     * that it is unclear what are the right alleles to be used.
     */
    public static final String FILTER_INDEL_STRADDLES_TWO_INTERVALS = "IndelStraddlesMultipleIntevals";

    /**
     * Filters to be added to the REJECT file.
     */
    private static final List FILTERS = CollectionUtil.makeList(
            new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_REV_COMP, "Liftover of a variant that needed reverse-complementing failed for unknown reasons."),
            new VCFFilterHeaderLine(FILTER_NO_TARGET, "Variant could not be lifted between genome builds."),
            new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover."),
            new VCFFilterHeaderLine(FILTER_INDEL_STRADDLES_TWO_INTERVALS, "Reference allele in Indel is straddling multiple intervals in the chain, and so the results are not well defined.")
    );

    /**
     * Attribute used to store the name of the source contig/chromosome prior to liftover.
     */
    public static final String ORIGINAL_CONTIG = "OriginalContig";

    /**
     * Attribute used to store the position of the variant on the source contig prior to liftover.
     */
    public static final String ORIGINAL_START = "OriginalStart";

    /**
     * Attribute used to store the list of original alleles (including REF), in the original order prior to liftover.
     */
    public static final String ORIGINAL_ALLELES = "OriginalAlleles";

    /**
     * Attribute used to store the position of the failed variant on the target contig prior to finding out that alleles do not match.
     */
    public static final String ATTEMPTED_LOCUS = "AttemptedLocus";

    /**
     * Attribute used to store the position of the failed variant on the target contig prior to finding out that alleles do not match.
     */
    public static final String ATTEMPTED_ALLELES = "AttemptedAlleles";

    /**
     * Metadata to be added to the Passing file.
     */
    private static final List ATTRS = CollectionUtil.makeList(
            new VCFInfoHeaderLine(ORIGINAL_CONTIG, 1, VCFHeaderLineType.String, "The name of the source contig/chromosome prior to liftover."),
            new VCFInfoHeaderLine(ORIGINAL_START, 1, VCFHeaderLineType.String, "The position of the variant on the source contig prior to liftover."),
            new VCFInfoHeaderLine(ORIGINAL_ALLELES, VCFHeaderLineCount.R, VCFHeaderLineType.String, "A list of the original alleles (including REF) of the variant prior to liftover.  If the alleles were not changed during liftover, this attribute will be omitted.")
    );

    private VariantContextWriter rejectedRecords;
    /** the output VariantContextWriter */
    private VariantContextWriter acceptedRecords;
    private final Log log = Log.getInstance(LiftoverVcf.class);
    /** the Variant sorter, may be null if DISABLE_SORT = true */
    private SortingCollection sorter;

    private long failedLiftover = 0, failedAlleleCheck = 0, totalTrackedAsSwapRefAlt = 0;
    private final Map rejectsByContig = new TreeMap<>();
    private final Map liftedByDestContig = new TreeMap<>();
    private final Map liftedBySourceContig = new TreeMap<>();

    @Override
    protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
        return new ReferenceArgumentCollection() {
            @Argument(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common=false,
                    doc = "The reference sequence (fasta) for the TARGET genome build (i.e., the new one.  The fasta file must have an " +
                                "accompanying sequence dictionary (.dict file).")
            public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;

            @Override
            public File getReferenceFile() {
                return REFERENCE_SEQUENCE;
            }
        };
    }

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        IOUtil.assertFileIsReadable(CHAIN);
        IOUtil.assertFileIsWritable(OUTPUT);
        IOUtil.assertFileIsWritable(REJECT);

        
        if (CREATE_INDEX && DISABLE_SORT) {
            log.error("CREATE_INDEX=true and DISABLE_SORT=true are mutually exclusive.");
            return 1;
        }
        ////////////////////////////////////////////////////////////////////////
        // Setup the inputs
        ////////////////////////////////////////////////////////////////////////
        final LiftOver liftOver = new LiftOver(CHAIN);
        liftOver.setShouldLogFailedIntervalsBelowThreshold(LOG_FAILED_INTERVALS);

        final VCFFileReader in = new VCFFileReader(INPUT, false);

        log.info("Loading up the target reference genome.");
        final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
        final Map refSeqs = new HashMap<>();
        // check if sequence dictionary exists
        if (walker.getSequenceDictionary() == null) {
            log.error("Reference " + REFERENCE_SEQUENCE.getAbsolutePath() + " must have an associated Dictionary .dict file in the same directory.");
            return 1;
        }
        for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
            refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()));
        }
        CloserUtil.close(walker);

        ////////////////////////////////////////////////////////////////////////
        // Setup the outputs
        ////////////////////////////////////////////////////////////////////////
        final VCFHeader inHeader = in.getFileHeader();
        // build VCF header, remove the old '##reference=file://...'
        final VCFHeader outHeader = new VCFHeader(
            inHeader.getMetaDataInInputOrder()
                .stream()
                .filter(M->!M.getKey().equals("reference"))
                .collect(Collectors.toCollection(LinkedHashSet::new)),
            inHeader.getSampleNamesInOrder()
            );
        outHeader.setSequenceDictionary(walker.getSequenceDictionary());
        if (WRITE_ORIGINAL_POSITION) {
            for (final VCFInfoHeaderLine line : ATTRS) outHeader.addMetaDataLine(line);
        }

        outHeader.addMetaDataLine(new VCFInfoHeaderLine(LiftoverUtils.SWAPPED_ALLELES, 0, VCFHeaderLineType.Flag,
                "The REF and the ALT alleles have been swapped in liftover due to changes in the reference. " +
                "It is possible that not all INFO annotations reflect this swap, and in the genotypes, " +
                "only the GT, PL, and AD fields have been modified. You should check the TAGS_TO_REVERSE parameter that was used " +
                        "during the LiftOver to be sure."));
        outHeader.addMetaDataLine(new VCFInfoHeaderLine(LiftoverUtils.REV_COMPED_ALLELES, 0, VCFHeaderLineType.Flag,
                "The REF and the ALT alleles have been reverse complemented in liftover since the mapping from the " +
                        "previous reference to the current one was on the negative strand."));
        outHeader.addMetaDataLine(new VCFHeaderLine("reference", REFERENCE_SEQUENCE.toURI().toString()));
        
        this.acceptedRecords = new VariantContextWriterBuilder()
            .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
            .modifyOption(Options.INDEX_ON_THE_FLY,!DISABLE_SORT)
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(walker.getSequenceDictionary())
            .build();
        
        this.acceptedRecords.writeHeader(outHeader);

        rejectedRecords = new VariantContextWriterBuilder().setOutputFile(REJECT)
                .unsetOption(Options.INDEX_ON_THE_FLY)
                .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
                .build();
        final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
        for (final VCFFilterHeaderLine line : FILTERS) {
            rejectHeader.addMetaDataLine(line);
        }

        rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS, 1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to reference allele mismatching to the target reference."));
        rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_ALLELES, 1, VCFHeaderLineType.String, "The alleles of the variant in the TARGET prior to failing due to reference allele mismatching to the target reference."));

        rejectedRecords.writeHeader(rejectHeader);

        ////////////////////////////////////////////////////////////////////////
        // Read the input VCF, lift the records over and write to the sorting
        // collection.
        ////////////////////////////////////////////////////////////////////////
        long total = 0;
        
        if (DISABLE_SORT) {
            log.info("Lifting variants over and writing the output file. Variants will not be sorted.");
            
            sorter = null;
            }
        else {
            log.info("Lifting variants over and sorting (not yet writing the output file.)");
    
            sorter = SortingCollection.newInstance(VariantContext.class,
                    new VCFRecordCodec(outHeader, ALLOW_MISSING_FIELDS_IN_HEADER || VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
            }

        ProgressLogger progress = new ProgressLogger(log, 1000000, "read");

        for (final VariantContext ctx : in) {
            ++total;
            final Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
            final Interval target = liftOver.liftOver(source, LIFTOVER_MIN_MATCH);

            // target is null when there is no good liftover for the context. This happens either when it fall in a gap
            // where there isn't a chain, or if a large enough proportion of it is diminished by the "deletion" at the
            // end of each interval in a chain.
            if (target == null) {
                rejectVariant(ctx, FILTER_NO_TARGET);
                continue;
            }

            // the target is the lifted-over interval comprised of the start/stop of the variant context,
            // if the sizes of target and ctx do not match, it means that the interval grew or shrank during
            // liftover which must be due to straddling multiple intervals in the liftover chain.
            // This would invalidate the indel as it isn't clear what the resulting alleles should be.
            if (ctx.getReference().length() != target.length()) {
                rejectVariant(ctx, FILTER_INDEL_STRADDLES_TWO_INTERVALS);
                continue;
            }

            final ReferenceSequence refSeq;

            if (!refSeqs.containsKey(target.getContig())) {
                rejectVariant(ctx, FILTER_NO_TARGET);

                final String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
                if (WARN_ON_MISSING_CONTIG) {
                    log.warn(missingContigMessage);
                } else {
                    log.error(missingContigMessage);
                    return EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE;
                }
            } else {
                refSeq = refSeqs.get(target.getContig());

                final VariantContext liftedVC = LiftoverUtils.liftVariant(ctx, target, refSeq, WRITE_ORIGINAL_POSITION, WRITE_ORIGINAL_ALLELES);
                // the liftedVC can be null if the liftover fails because of a problem with reverse complementing
                if (liftedVC == null) {
                    rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_REV_COMP);
                } else {
                    tryToAddVariant(liftedVC, refSeq, ctx);
                }
            }
            progress.record(ctx.getContig(), ctx.getStart());
        }

        final NumberFormat pfmt = new DecimalFormat("0.0000%");
        final String pct = pfmt.format((failedLiftover + failedAlleleCheck) / (double) total);
        log.info("Processed ", total, " variants.");
        log.info(failedLiftover, " variants failed to liftover.");
        log.info(failedAlleleCheck, " variants lifted over but had mismatching reference alleles after lift over.");
        log.info(pct, " of variants were not successfully lifted over and written to the output.");

        final Set contigUnion = new TreeSet<>();
        contigUnion.addAll(liftedBySourceContig.keySet());
        contigUnion.addAll(rejectsByContig.keySet());

        log.info("liftover success by source contig:");
        for (String contig : contigUnion) {
            final long success = liftedBySourceContig.getOrDefault(contig, 0L);
            final long fail = rejectsByContig.getOrDefault(contig, 0L);
            final String liftPct = pfmt.format((double)success / (double)(success + fail));

            log.info(contig, ": ", success, " / ", (success + fail), " (", liftPct, ")");
        }

        log.info("lifted variants by target contig:");
        for (String contig : liftedByDestContig.keySet()) {
            log.info(contig, ": ", liftedByDestContig.get(contig));
        }
        if (liftedByDestContig.isEmpty()) {
            log.info("no successfully lifted variants");
        }

        if (RECOVER_SWAPPED_REF_ALT) {
            log.info(totalTrackedAsSwapRefAlt, " variants were lifted by swapping REF/ALT alleles.");
        } else {
            log.warn(totalTrackedAsSwapRefAlt, " variants with a swapped REF/ALT were identified, but were not recovered.  See RECOVER_SWAPPED_REF_ALT and associated caveats.");
        }

        rejectedRecords.close();
        in.close();

        if (!DISABLE_SORT) { 
            ////////////////////////////////////////////////////////////////////////
            // Write the sorted outputs to the final output file
            ////////////////////////////////////////////////////////////////////////
            sorter.doneAdding();
            progress = new ProgressLogger(log, 1000000, "written");
            log.info("Writing out sorted records to final VCF.");
    
            for (final VariantContext ctx : sorter) {
                this.acceptedRecords.add(ctx);
                progress.record(ctx.getContig(), ctx.getStart());
            }
    
            sorter.cleanup();
        }

        this.acceptedRecords.close();
        
        return 0;
    }

    private void rejectVariant(final VariantContext ctx, final String reason) {
        rejectedRecords.add(new VariantContextBuilder(ctx).filter(reason).make());
        failedLiftover++;
        trackLiftedVariantContig(rejectsByContig, ctx.getContig());
    }

    private void trackLiftedVariantContig(final Map map, final String contig) {
        Long val = map.get(contig);
        if (val == null) {
            val = 0L;
        }

        map.put(contig, ++val);
    }

    private void addAndTrack(final VariantContext toAdd, final VariantContext source) {
        trackLiftedVariantContig(liftedBySourceContig, source.getContig());
        trackLiftedVariantContig(liftedByDestContig, toAdd.getContig());
        if (!DISABLE_SORT) { //we're sorting the variants
            sorter.add(toAdd);
        } else {
            this.acceptedRecords.add(toAdd);
        }
    }

    /**
     *  utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
     *
     * @param vc new {@link VariantContext}
     * @param refSeq {@link ReferenceSequence} of new reference
     * @param source the original {@link VariantContext} to use for putting the original location information into vc
     */
    private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
        if (!refSeq.getName().equals(vc.getContig())) {
            throw new IllegalStateException("The contig of the VariantContext, " + vc.getContig() + ", doesnt match the ReferenceSequence: " + refSeq.getName());
        }

        // Check that the reference allele still agrees with the reference sequence
        boolean mismatchesReference = false;
        for (final Allele allele : vc.getAlleles()) {
            if (allele.isReference()) {
                final byte[] ref = refSeq.getBases();
                final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1);

                if (!refString.equalsIgnoreCase(allele.getBaseString())) {
                    // consider that the ref and the alt may have been swapped in a simple biallelic SNP
                    if (vc.isBiallelic() && vc.isSNP() && refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) {
                        if (RECOVER_SWAPPED_REF_ALT) {
                            totalTrackedAsSwapRefAlt++;
                            addAndTrack(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP), source);
                            return;
                        } else {
                            totalTrackedAsSwapRefAlt++;
                        }
                    }
                    mismatchesReference = true;
                }
                break;
            }
        }

        if (mismatchesReference) {
            rejectedRecords.add(new VariantContextBuilder(source)
                    .filter(FILTER_MISMATCHING_REF_ALLELE)
                    .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
                    .attribute(ATTEMPTED_ALLELES, vc.getReference().toString() + "->" + String.join(",", vc.getAlternateAlleles().stream().map(Allele::toString).collect(Collectors.toList())))
                    .make());
            failedAlleleCheck++;
            trackLiftedVariantContig(rejectsByContig, source.getContig());
        } else {
            addAndTrack(vc, source);
        }
    }
}