All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.variant.bcf2.BCF2Utils Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
* Copyright (c) 2012 The Broad Institute
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package htsjdk.variant.bcf2;

import htsjdk.samtools.util.FileExtensions;
import htsjdk.tribble.TribbleException;
import htsjdk.variant.vcf.*;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/**
 * Common utilities for working with BCF2 files
 *
 * Includes convenience methods for encoding, decoding BCF2 type descriptors (size + type)
 *
 * @author depristo
 * @since 5/12
 */
public final class BCF2Utils {
    public static final int MAX_ALLELES_IN_GENOTYPES = 127;

    public static final int OVERFLOW_ELEMENT_MARKER = 15;
    public static final int MAX_INLINE_ELEMENTS = 14;

    public final static BCF2Type[] INTEGER_TYPES_BY_SIZE = new BCF2Type[]{BCF2Type.INT8, BCF2Type.INT16, BCF2Type.INT32};
    public final static BCF2Type[] ID_TO_ENUM;

    static {
        int maxID = -1;
        for ( BCF2Type v : BCF2Type.values() ) maxID = Math.max(v.getID(), maxID);
        ID_TO_ENUM = new BCF2Type[maxID+1];
        for ( BCF2Type v : BCF2Type.values() ) ID_TO_ENUM[v.getID()] = v;
    }

    private BCF2Utils() {}

    /**
     * Create a strings dictionary from the VCF header
     *
     * The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
     * fields.
     *
     * Note that its critical that the list be dedupped and sorted in a consistent manner each time,
     * as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
     * the same way as in the header each time it's very bad
     *
     * @param header the VCFHeader from which to build the dictionary
     * @return a non-null dictionary of elements, may be empty
     */
    public static ArrayList makeDictionary(final VCFHeader header) {
        final Set seen = new HashSet();
        final ArrayList dict = new ArrayList();

        // special case the special PASS field which doesn't show up in the FILTER field definitions
        seen.add(VCFConstants.PASSES_FILTERS_v4);
        dict.add(VCFConstants.PASSES_FILTERS_v4);

        // set up the strings dictionary
        for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
            if ( line.shouldBeAddedToDictionary() ) {
                final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
                if ( ! seen.contains(idLine.getID())) {
                    dict.add(idLine.getID());
                    seen.add(idLine.getID());
                }
            }
        }

        return dict;
    }

    public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
        return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F));
    }

    public static int decodeSize(final byte typeDescriptor) {
        return (0xF0 & typeDescriptor) >> 4;
    }

    public static int decodeTypeID(final byte typeDescriptor) {
        return typeDescriptor & 0x0F;
    }

    public static BCF2Type decodeType(final byte typeDescriptor) {
        return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
    }

    public static boolean sizeIsOverflow(final byte typeDescriptor) {
        return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
    }

    public static byte readByte(final InputStream stream) throws IOException {
        return (byte)(stream.read() & 0xFF);
    }

    /**
     * Collapse multiple strings into a comma separated list
     *
     * ["s1", "s2", "s3"] => ",s1,s2,s3"
     *
     * @param strings size > 1 list of strings
     * @return
     */
    public static String collapseStringList(final List strings) {
        if ( strings.isEmpty() ) return "";
        else if ( strings.size() == 1 ) return strings.get(0);
        else {
            final StringBuilder b = new StringBuilder();
            for ( final String s : strings ) {
                if ( s != null ) {
                    assert s.indexOf(",") == -1; // no commas in individual strings
                    b.append(',').append(s);
                }
            }
            return b.toString();
        }
    }

    /**
     * Inverse operation of collapseStringList.
     *
     * ",s1,s2,s3" => ["s1", "s2", "s3"]
     *
     *
     * @param collapsed
     * @return
     */
    public static List explodeStringList(final String collapsed) {
        assert isCollapsedString(collapsed);
        final String[] exploded = collapsed.substring(1).split(",");
        return Arrays.asList(exploded);
    }

    public static boolean isCollapsedString(final String s) {
        return !s.isEmpty() && s.charAt(0) == ',';
    }

    /**
     * Returns a good name for a shadow BCF file for vcfFile.
     *
     * foo.vcf => foo.bcf
     * foo.xxx => foo.xxx.bcf
     *
     * If the resulting BCF file cannot be written, return null.  Happens
     * when vcfFile = /dev/null for example
     *
     * @param vcfFile
     * @return the BCF
     */
    public static final File shadowBCF(final File vcfFile) {
        final String path = vcfFile.getAbsolutePath();
        if ( path.contains(FileExtensions.VCF) )
            return new File(path.replace(FileExtensions.VCF, FileExtensions.BCF));
        else {
            final File bcf = new File( path + FileExtensions.BCF );
            if ( bcf.canRead() )
                return bcf;
            else {
                try {
                    // this is the only way to robustly decide if we could actually write to BCF
                    final FileOutputStream o = new FileOutputStream(bcf);
                    o.close();
                    bcf.delete();
                    return bcf;
                } catch ( FileNotFoundException e ) {
                    return null;
                } catch ( IOException e ) {
                    return null;
                }
            }
        }
    }

    public static BCF2Type determineIntegerType(final int value) {
        for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
            if ( potentialType.withinRange(value) )
                return potentialType;
        }

        throw new TribbleException("Integer cannot be encoded in allowable range of even INT32: " + value);
    }

    public static BCF2Type determineIntegerType(final int[] values) {
        // find the min and max values in the array
        int max = 0, min = 0;
        for ( final int v : values ) {
            if ( v > max ) max = v;
            if ( v < min ) min = v;
        }

        final BCF2Type maxType = determineIntegerType(max);
        final BCF2Type minType = determineIntegerType(min);

        // INT8 < INT16 < INT32 so this returns the larger of the two
        return maxType.compareTo(minType) >= 0 ? maxType : minType;
    }

    /**
     * Returns the maximum BCF2 integer size of t1 and t2
     *
     * For example, if t1 == INT8 and t2 == INT16 returns INT16
     *
     * @param t1
     * @param t2
     * @return
     */
    public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
        switch ( t1 ) {
            case INT8: return t2;
            case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
            case INT32: return t1;
            default: throw new TribbleException("BUG: unexpected BCF2Type " + t1);
        }
    }

    public static BCF2Type determineIntegerType(final List values) {
        BCF2Type maxType = BCF2Type.INT8;
        for ( final int value : values ) {
            final BCF2Type type1 = determineIntegerType(value);
            switch ( type1 ) {
                case INT8: break;
                case INT16: maxType = BCF2Type.INT16; break;
                case INT32: return BCF2Type.INT32; // fast path for largest possible value
                default: throw new TribbleException("Unexpected integer type " + type1 );
            }
        }
        return maxType;
    }

    /**
     * Helper function that takes an object and returns a list representation
     * of it:
     *
     * o == null => []
     * o is a list => o
     * else => [o]
     *
     * @param c  the class of the object
     * @param o  the object to convert to a Java List
     * @return
     */
    public static  List toList(final Class c, final Object o) {
        if ( o == null ) return Collections.emptyList();
        else if ( o instanceof List ) return (List)o;
        else if ( o.getClass().isArray() ) {
            final int arraySize = Array.getLength(o);
            final List list = new ArrayList(arraySize);
            for (int i=0; i outputLinesIt = outputHeader.getIDHeaderLines().iterator();
        final Iterator inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator();

        while ( inputLinesIt.hasNext() ) {
            if ( ! outputLinesIt.hasNext() ) // missing lines in output
                return false;

            final VCFIDHeaderLine outputLine = outputLinesIt.next();
            final VCFIDHeaderLine inputLine = inputLinesIt.next();

            if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) )
                return false;
        }

        return true;
    }

    private static  List nullAsEmpty(List l) {
        if ( l == null )
            return Collections.emptyList();
        else
            return l;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy