![JAR search and dependency download from the Maven repository](/logo.png)
htsjdk.samtools.BinaryTagCodec Maven / Gradle / Ivy
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.StringUtil;
import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/**
* Converter between disk and in-memory representation of a SAMRecord tag.
*/
public class BinaryTagCodec {
// Size in bytes of the fixed part of the disk representation of a tag,
// i.e. the number of bytes occupied by the tag name and tag type fields.
private static final int FIXED_TAG_SIZE = 3;
// Size in bytes of the fixed part of the value of a binary array,
// i.e. the number of bytes occupied by the array type and the array length.
private static final int FIXED_BINARY_ARRAY_TAG_SIZE = 5;
// Integers are stored in the smallest size that will hold them.
private static final long MAX_INT = Integer.MAX_VALUE;
private static final long MAX_UINT = MAX_INT * 2 + 1;
private static final long MAX_SHORT = Short.MAX_VALUE;
private static final long MAX_USHORT = MAX_SHORT * 2 + 1;
private static final long MAX_BYTE = Byte.MAX_VALUE;
private static final long MAX_UBYTE = MAX_BYTE * 2 + 1;
// Source or sink for disk representation.
final BinaryCodec binaryCodec;
/**
* For writing tags.
* For reading tags, a BinaryCodec is not used. See readTags() below.
* @param binaryCodec where to write the file rep of the tags
*/
public BinaryTagCodec(final BinaryCodec binaryCodec) {
this.binaryCodec = binaryCodec;
}
/**
* @param attributeValue In-memory representation of a tag value.
* @return Size in bytes to store the value on disk.
*/
private static int getBinaryValueSize(final Object attributeValue) {
switch (getTagValueType(attributeValue)) {
case 'Z':
return ((String)attributeValue).length() + 1;
case 'A':
return 1;
case 'I':
case 'i':
return 4;
case 's':
case 'S':
return 2;
case 'c':
case 'C':
return 1;
case 'f':
return 4;
case 'H':
final byte[] byteArray = (byte[])attributeValue;
return byteArray.length * 2 + 1;
case 'B':
final int numElements = Array.getLength(attributeValue);
final int elementSize;
if(attributeValue instanceof byte[]) {
elementSize = 1;
} else if(attributeValue instanceof short[]) {
elementSize = 2;
} else if(attributeValue instanceof int[]) {
elementSize = 4;
} else if(attributeValue instanceof float[]) {
elementSize = 4;
} else {
throw new IllegalArgumentException("Unsupported array type: " + attributeValue.getClass());
}
return numElements * elementSize + FIXED_BINARY_ARRAY_TAG_SIZE;
default:
throw new IllegalArgumentException("When writing BAM, unrecognized tag type " +
attributeValue.getClass().getName());
}
}
/**
* @param value In-memory representation of a tag value.
* @return Size in bytes to store the tag name, tag type and tag value on disk.
*/
static int getTagSize(final Object value) {
return FIXED_TAG_SIZE + getBinaryValueSize(value);
}
/**
* @param value In-memory representation of a tag value.
* @return One-character disk representation of tag type.
*/
static char getTagValueType(final Object value) {
if (value instanceof String) {
return 'Z';
} else if (value instanceof Character) {
return 'A';
} else if (value instanceof Float) {
return 'f';
} else if (value instanceof Number) {
if (!(value instanceof Byte || value instanceof Short || value instanceof Integer || value instanceof Long)) {
throw new IllegalArgumentException("Unrecognized tag type " + value.getClass().getName());
}
return getIntegerType(((Number)value).longValue());
} /*
Note that H tag type is never written anymore, because B style is more compact.
else if (value instanceof byte[]) {
return 'H';
}
*/
else if (value instanceof byte[] || value instanceof short[] || value instanceof int[] || value instanceof float[]) {
return 'B';
} else {
throw new IllegalArgumentException("When writing BAM, unrecognized tag type " +
value.getClass().getName());
}
}
/**
* @param val Integer tag value.
* @return Tag type corresponding to the smallest integer type that will hold the given value.
*/
static private char getIntegerType(final long val) {
if (val > MAX_UINT) {
throw new IllegalArgumentException("Integer attribute value too large to be encoded in BAM");
}
if (val > MAX_INT) {
return 'I';
}
if (val > MAX_USHORT) {
return 'i';
}
if (val > MAX_SHORT) {
return 'S';
}
if (val > MAX_UBYTE) {
return 's';
}
if (val > MAX_BYTE) {
return 'C';
}
if (val >= Byte.MIN_VALUE) {
return 'c';
}
if (val >= Short.MIN_VALUE) {
return 's';
}
if (val >= Integer.MIN_VALUE) {
return 'i';
}
throw new IllegalArgumentException("Integer attribute value too negative to be encoded in BAM");
}
/**
* Write the given tag name and value to disk.
*/
public void writeTag(final short tag, final Object value, final boolean isUnsignedArray) {
binaryCodec.writeShort(tag);
final char tagValueType = getTagValueType(value);
binaryCodec.writeByte(tagValueType);
switch (tagValueType) {
case 'Z':
binaryCodec.writeString((String)value, false, true);
break;
case 'A':
binaryCodec.writeByte(((Character)value));
break;
case 'I':
binaryCodec.writeUInt((Long)value);
break;
case 'i':
binaryCodec.writeInt(((Number)value).intValue());
break;
case 's':
binaryCodec.writeShort(((Number)value).shortValue());
break;
case 'S':
binaryCodec.writeUShort(((Number)value).intValue());
break;
case 'c':
binaryCodec.writeByte(((Number)value).byteValue());
break;
case 'C':
binaryCodec.writeUByte(((Integer)value).shortValue());
break;
case 'f':
binaryCodec.writeFloat((Float)value);
break;
/*
Writing H is no longer supported
case 'H':
final byte[] byteArray = (byte[])value;
binaryCodec.writeString(StringUtil.bytesToHexString(byteArray), false, true);
break;
*/
case 'B':
writeArray(value, isUnsignedArray);
break;
default:
throw new IllegalArgumentException("When writing BAM, unrecognized tag type " +
value.getClass().getName());
}
}
private void writeArray(final Object value, final boolean isUnsignedArray) {
if (value instanceof byte[]) {
binaryCodec.writeByte(isUnsignedArray? 'C': 'c');
final byte[] array = (byte[]) value;
binaryCodec.writeInt(array.length);
for (final byte element: array) binaryCodec.writeByte(element);
} else if (value instanceof short[]) {
binaryCodec.writeByte(isUnsignedArray? 'S': 's');
final short[] array = (short[]) value;
binaryCodec.writeInt(array.length);
for (final short element: array) binaryCodec.writeShort(element);
} else if (value instanceof int[]) {
binaryCodec.writeByte(isUnsignedArray? 'I': 'i');
final int[] array = (int[]) value;
binaryCodec.writeInt(array.length);
for (final int element: array) binaryCodec.writeInt(element);
} else if (value instanceof float[]) {
binaryCodec.writeByte('f');
final float[] array = (float[]) value;
binaryCodec.writeInt(array.length);
for (final float element: array) binaryCodec.writeFloat(element);
} else throw new SAMException("Unrecognized array value type: " + value.getClass());
}
/**
* Convert tags from little-endian disk representation to in-memory representation.
* @param binaryRep Byte buffer containing file representation of tags.
* @param offset Where in binaryRep tags start.
* @param length How many bytes in binaryRep are tag storage.
*/
public static SAMBinaryTagAndValue readTags(final byte[] binaryRep, final int offset,
final int length, final ValidationStringency validationStringency) {
final ByteBuffer byteBuffer = ByteBuffer.wrap(binaryRep, offset, length);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
SAMBinaryTagAndValue head = null;
SAMBinaryTagAndValue tail = null;
while (byteBuffer.hasRemaining()) {
final short tag = byteBuffer.getShort();
final byte tagType = byteBuffer.get();
final SAMBinaryTagAndValue tmp;
if (tagType != 'B') {
tmp = new SAMBinaryTagAndValue(tag, readSingleValue(tagType, byteBuffer, validationStringency));
} else {
final TagValueAndUnsignedArrayFlag valueAndFlag = readArray(byteBuffer, validationStringency);
if (valueAndFlag.isUnsignedArray) tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, valueAndFlag.value);
else tmp = new SAMBinaryTagAndValue(tag, valueAndFlag.value);
}
// If samjdk wrote the BAM then the attributes will be in lowest->highest tag order, to inserting at the
// head each time will be very inefficient. To fix that we check here to see if the tag should go right on
// the tail and if so stick it there, else insert it through the head.
if (head == null) {
head = tmp;
tail = tmp;
}
else if (tmp.tag > tail.tag) {
tail.insert(tmp);
tail = tmp;
}
else {
head = head.insert(tmp);
}
}
return head;
}
/**
* Read value of specified non-array type.
* @param tagType What type to read.
* @param byteBuffer Little-ending byte buffer to read value from.
* @return Value in in-memory Object form.
*/
private static Object readSingleValue(final byte tagType, final ByteBuffer byteBuffer,
final ValidationStringency validationStringency) {
switch (tagType) {
case 'Z':
return readNullTerminatedString(byteBuffer);
case 'A':
return (char)byteBuffer.get();
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
if ( val <= Integer.MAX_VALUE ) {
return (int)val;
}
// If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer,
// return it directly as a long
if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) {
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
"Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency);
}
return val;
case 'i':
return byteBuffer.getInt();
case 's':
return (int)byteBuffer.getShort();
case 'S':
// Convert to unsigned short stored in an int
return byteBuffer.getShort() & 0xffff;
case 'c':
return (int)byteBuffer.get();
case 'C':
// Convert to unsigned byte stored in an int
return (int)byteBuffer.get() & 0xff;
case 'f':
return byteBuffer.getFloat();
case 'H':
final String hexRep = readNullTerminatedString(byteBuffer);
return StringUtil.hexStringToBytes(hexRep);
default:
throw new SAMFormatException("Unrecognized tag type: " + (char)tagType);
}
}
/**
* Read value of specified type.
* @param byteBuffer Little-ending byte buffer to read value from.
* @return CVO containing the value in in-memory Object form, and a flag indicating whether it is unsigned or not.
*/
private static TagValueAndUnsignedArrayFlag readArray(final ByteBuffer byteBuffer,
final ValidationStringency validationStringency) {
final byte arrayType = byteBuffer.get();
final boolean isUnsigned = Character.isUpperCase(arrayType);
final int length = byteBuffer.getInt();
final Object value;
switch (Character.toLowerCase(arrayType)) {
case 'c': {
final byte[] array = new byte[length];
value = array;
byteBuffer.get(array);
break;
}
case 's': {
final short[] array = new short[length];
value = array;
for (int i = 0; i < length; ++i) {
array[i] = byteBuffer.getShort();
}
break;
}
case 'i': {
final int[] array = new int[length];
value = array;
for (int i = 0; i < length; ++i) {
array[i] = byteBuffer.getInt();
}
break;
}
case 'f': {
final float[] array = new float[length];
value = array;
for (int i = 0; i < length; ++i) {
array[i] = byteBuffer.getFloat();
}
break;
}
default:
throw new SAMFormatException("Unrecognized tag array type: " + (char)arrayType);
}
return new TagValueAndUnsignedArrayFlag(value, isUnsigned);
}
private static String readNullTerminatedString(final ByteBuffer byteBuffer) {
// Count the number of bytes in the string
byteBuffer.mark();
final int startPosition = byteBuffer.position();
while (byteBuffer.get() != 0) {}
final int endPosition = byteBuffer.position();
// Don't count null terminator
final byte[] buf = new byte[endPosition - startPosition - 1];
// Go back to the start of the string and read out the bytes
byteBuffer.reset();
byteBuffer.get(buf);
// Skip over the null terminator
byteBuffer.get();
return StringUtil.bytesToString(buf);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy