Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.variant.bcf2;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.tribble.TribbleException;
import htsjdk.variant.vcf.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* Common utilities for working with BCF2 files
*
* Includes convenience methods for encoding, decoding BCF2 type descriptors (size + type)
*
* @author depristo
* @since 5/12
*/
public final class BCF2Utils {
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
public static final int OVERFLOW_ELEMENT_MARKER = 15;
public static final int MAX_INLINE_ELEMENTS = 14;
public final static BCF2Type[] INTEGER_TYPES_BY_SIZE = new BCF2Type[]{BCF2Type.INT8, BCF2Type.INT16, BCF2Type.INT32};
public final static BCF2Type[] ID_TO_ENUM;
static {
int maxID = -1;
for ( BCF2Type v : BCF2Type.values() ) maxID = Math.max(v.getID(), maxID);
ID_TO_ENUM = new BCF2Type[maxID+1];
for ( BCF2Type v : BCF2Type.values() ) ID_TO_ENUM[v.getID()] = v;
}
private BCF2Utils() {}
/**
* Create a strings dictionary from the VCF header
*
* The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
* fields.
*
* Note that its critical that the list be dedupped and sorted in a consistent manner each time,
* as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
* the same way as in the header each time it's very bad
*
* @param header the VCFHeader from which to build the dictionary
* @return a non-null dictionary of elements, may be empty
*/
public static ArrayList makeDictionary(final VCFHeader header) {
final Set seen = new HashSet();
final ArrayList dict = new ArrayList();
// special case the special PASS field which doesn't show up in the FILTER field definitions
seen.add(VCFConstants.PASSES_FILTERS_v4);
dict.add(VCFConstants.PASSES_FILTERS_v4);
// set up the strings dictionary
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
if ( line.shouldBeAddedToDictionary() ) {
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
dict.add(idLine.getID());
seen.add(idLine.getID());
}
}
}
return dict;
}
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F));
}
public static int decodeSize(final byte typeDescriptor) {
return (0xF0 & typeDescriptor) >> 4;
}
public static int decodeTypeID(final byte typeDescriptor) {
return typeDescriptor & 0x0F;
}
public static BCF2Type decodeType(final byte typeDescriptor) {
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
}
public static boolean sizeIsOverflow(final byte typeDescriptor) {
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
}
public static byte readByte(final InputStream stream) throws IOException {
return (byte)(stream.read() & 0xFF);
}
/**
* Collapse multiple strings into a comma separated list
*
* ["s1", "s2", "s3"] => ",s1,s2,s3"
*
* @param strings size > 1 list of strings
* @return
*/
public static String collapseStringList(final List strings) {
if ( strings.isEmpty() ) return "";
else if ( strings.size() == 1 ) return strings.get(0);
else {
final StringBuilder b = new StringBuilder();
for ( final String s : strings ) {
if ( s != null ) {
assert s.indexOf(",") == -1; // no commas in individual strings
b.append(',').append(s);
}
}
return b.toString();
}
}
/**
* Inverse operation of collapseStringList.
*
* ",s1,s2,s3" => ["s1", "s2", "s3"]
*
*
* @param collapsed
* @return
*/
public static List explodeStringList(final String collapsed) {
assert isCollapsedString(collapsed);
final String[] exploded = collapsed.substring(1).split(",");
return Arrays.asList(exploded);
}
public static boolean isCollapsedString(final String s) {
return !s.isEmpty() && s.charAt(0) == ',';
}
/**
* Returns a good name for a shadow BCF file for vcfFile.
*
* foo.vcf => foo.bcf
* foo.xxx => foo.xxx.bcf
*
* If the resulting BCF file cannot be written, return null. Happens
* when vcfFile = /dev/null for example
*
* @param vcfFile
* @return the BCF
*/
public static final File shadowBCF(final File vcfFile) {
final String path = vcfFile.getAbsolutePath();
if ( path.contains(FileExtensions.VCF) )
return new File(path.replace(FileExtensions.VCF, FileExtensions.BCF));
else {
final File bcf = new File( path + FileExtensions.BCF );
if ( bcf.canRead() )
return bcf;
else {
try {
// this is the only way to robustly decide if we could actually write to BCF
final FileOutputStream o = new FileOutputStream(bcf);
o.close();
bcf.delete();
return bcf;
} catch ( FileNotFoundException e ) {
return null;
} catch ( IOException e ) {
return null;
}
}
}
}
public static BCF2Type determineIntegerType(final int value) {
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
if ( potentialType.withinRange(value) )
return potentialType;
}
throw new TribbleException("Integer cannot be encoded in allowable range of even INT32: " + value);
}
public static BCF2Type determineIntegerType(final int[] values) {
// find the min and max values in the array
int max = 0, min = 0;
for ( final int v : values ) {
if ( v > max ) max = v;
if ( v < min ) min = v;
}
final BCF2Type maxType = determineIntegerType(max);
final BCF2Type minType = determineIntegerType(min);
// INT8 < INT16 < INT32 so this returns the larger of the two
return maxType.compareTo(minType) >= 0 ? maxType : minType;
}
/**
* Returns the maximum BCF2 integer size of t1 and t2
*
* For example, if t1 == INT8 and t2 == INT16 returns INT16
*
* @param t1
* @param t2
* @return
*/
public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
switch ( t1 ) {
case INT8: return t2;
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
case INT32: return t1;
default: throw new TribbleException("BUG: unexpected BCF2Type " + t1);
}
}
public static BCF2Type determineIntegerType(final List values) {
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
final BCF2Type type1 = determineIntegerType(value);
switch ( type1 ) {
case INT8: break;
case INT16: maxType = BCF2Type.INT16; break;
case INT32: return BCF2Type.INT32; // fast path for largest possible value
default: throw new TribbleException("Unexpected integer type " + type1 );
}
}
return maxType;
}
/**
* Helper function that takes an object and returns a list representation
* of it:
*
* o == null => []
* o is a list => o
* else => [o]
*
* @param c the class of the object
* @param o the object to convert to a Java List
* @return
*/
public static List toList(final Class c, final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List)o;
else if ( o.getClass().isArray() ) {
final int arraySize = Array.getLength(o);
final List list = new ArrayList(arraySize);
for (int i=0; i outputLinesIt = outputHeader.getIDHeaderLines().iterator();
final Iterator extends VCFIDHeaderLine> inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator();
while ( inputLinesIt.hasNext() ) {
if ( ! outputLinesIt.hasNext() ) // missing lines in output
return false;
final VCFIDHeaderLine outputLine = outputLinesIt.next();
final VCFIDHeaderLine inputLine = inputLinesIt.next();
if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) )
return false;
}
return true;
}
private static List nullAsEmpty(List l) {
if ( l == null )
return Collections.emptyList();
else
return l;
}
}