All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.TextTagCodec Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package htsjdk.samtools;

import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.Iso8601Date;
import htsjdk.samtools.util.StringUtil;

import java.lang.reflect.Array;
import java.text.DateFormat;
import java.text.ParseException;
import java.util.Date;
import java.util.Map;

/**
 * Converter between SAM text representation of a tag, and in-memory Object representation.
 * Note that this class is not thread-safe, in that some local variables have been made into instance
 * variables in order to reduce object creation, but it should not ever be the case that the same
 * instance is used in multiple threads.
 */
public class TextTagCodec {
    // 3 fields for non-empty strings 2 fields if the string is empty.
    private static final int NUM_TAG_FIELDS = 3;

    private static final String[] EMPTY_STRING_ARRAY = new String[0];

    /**
     * This is really a local variable of decode(), but allocated here to reduce allocations.
     */
    private final String[] fields = new String[NUM_TAG_FIELDS];

    /**
     * Convert in-memory representation of tag to SAM text representation.
     * @param tagName Two-character tag name.
     * @param value Tag value as appropriate Object subclass.
     * @return SAM text String representation, i.e. name:type:value
     */
    public String encode(final String tagName, Object value) {
        final StringBuilder sb = new StringBuilder(tagName);
        sb.append(':');
        char tagType = BinaryTagCodec.getTagValueType(value);
        switch (tagType) {
            case 'c':
            case 'C':
            case 's':
            case 'S':
            case 'I':
                tagType = 'i';
        }
        if (tagType == 'H') {
            // H should never happen anymore.
            value = StringUtil.bytesToHexString((byte[])value);
        } else if (tagType == 'B') {
            value = getArrayType(value, false) + encodeArrayValue(value);
        } else if (tagType == 'i') {
            final long longVal = ((Number) value).longValue();
            // as the spec says: [-2^31, 2^32)
            if (longVal < Integer.MIN_VALUE || longVal > BinaryCodec.MAX_UINT) {
                throw new IllegalArgumentException("Value for tag " + tagName + " cannot be stored in either a signed or unsigned 32-bit integer: " + longVal);
            }
        }
        sb.append(tagType).append(':').append(value.toString());
        return sb.toString();
    }

    private static char getArrayType(final Object array, final boolean isUnsigned) {
        final char type;
        final Class componentType = array.getClass().getComponentType();
        if (componentType == Float.TYPE) {
            if (isUnsigned) throw new IllegalArgumentException("float array cannot be unsigned");
            return 'f';
        }
        else if (componentType == Byte.TYPE)    type = 'c';
        else if (componentType == Short.TYPE)   type = 's';
        else if (componentType == Integer.TYPE) type = 'i';
        else throw new IllegalArgumentException("Unrecognized array type " + componentType);
        return (isUnsigned? Character.toUpperCase(type): type);
    }

    private static String encodeArrayValue(final Object value) {
        final int length = Array.getLength(value);
        final StringBuilder ret = new StringBuilder();
        for (int i = 0; i < length; ++i) {
            ret.append(',');
            ret.append(Array.get(value, i).toString());
        }
        return ret.toString();

    }

    private static long[] widenToUnsigned(final Object array) {
        final Class componentType = array.getClass().getComponentType();
        final long mask;
        if (componentType == Byte.TYPE)    mask = 0xffL;
        else if (componentType == Short.TYPE)   mask = 0xffffL;
        else if (componentType == Integer.TYPE) mask = 0xffffffffL;
        else throw new IllegalArgumentException("Unrecognized unsigned array type " + componentType);
        final long[] ret = new long[Array.getLength(array)];
        for (int i = 0; i < ret.length; ++i) {
            ret[i] = Array.getLong(array, i) & mask;
        }
        return ret;
    }

    String encodeUnsignedArray(final String tagName, final Object array) {
        if (!array.getClass().isArray()) {
            throw new IllegalArgumentException("Non-array passed to encodeUnsignedArray: " + array.getClass());
        }
        final long[] widened = widenToUnsigned(array);
        return tagName + ":B:" + getArrayType(array, true) + encodeArrayValue(widened);
    }

    /**
     * Encode a standard header tag, which should not have a type field.
     * @param tagName 2-character String.
     * @param value Not necessarily a String.  Some of these are integers but the type is implied by
     * the tagName.  Converted to String with toString().
     * @return Colon-separated text representation suitable for a SAM header, i.e. name:value.
     */
    public String encodeUntypedTag(final String tagName, final Object value) {
        return new StringBuilder(tagName).append(':')
                .append(value.toString()).toString();
    }

    /**
     * Convert typed tag in SAM text format (name:type:value) into tag name and Object value representation.
     * @param tag SAM text format name:type:value tag.
     * @return Tag name as 2-character String, and tag value in appropriate class based on tag type.
     * If value is an unsigned array, then the value is a TagValueAndUnsignedArrayFlag object.
     */
    public Map.Entry decode(final String tag) {
        final int numFields = StringUtil.splitConcatenateExcessTokens(tag, fields, ':');
        if (numFields != TextTagCodec.NUM_TAG_FIELDS && numFields != TextTagCodec.NUM_TAG_FIELDS - 1) {
            throw new SAMFormatException("Not enough fields in tag '" + tag + "'");
        }
        final String key = fields[0];
        final String type = fields[1];
        final String stringVal = numFields == TextTagCodec.NUM_TAG_FIELDS ? fields[2] : "";
        final Object val = convertStringToObject(type, stringVal);
        return new Map.Entry() {
            @Override
            public String getKey() {
                return key;
            }

            @Override
            public Object getValue() {
                return val;
            }

            @Override
            public Object setValue(final Object o) {
                throw new UnsupportedOperationException();
            }
        };
    }

    private static Object convertStringToObject(final String type, final String stringVal) {
        if (type.equals("Z")) {
            return stringVal;
        } else if (type.equals("A")) {
            if (stringVal.length() != 1) {
                throw new SAMFormatException("Tag of type A should have a single-character value");
            }
            return stringVal.charAt(0);
        } else if (type.equals("i")) {
            final long lValue;
            try {
                lValue = Long.parseLong(stringVal);
            } catch (NumberFormatException e) {
                throw new SAMFormatException("Tag of type i should have signed decimal value");
            }

            if (lValue >= Integer.MIN_VALUE && lValue <= Integer.MAX_VALUE) {
                return (int) lValue;
            }
            else if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
                return lValue;
            }
            else {
                throw new SAMFormatException("Integer is out of range for both a 32-bit signed and unsigned integer: " + stringVal);
            }
        } else if (type.equals("f")) {
            try {
                return Float.parseFloat(stringVal);
            } catch (NumberFormatException e) {
                throw new SAMFormatException("Tag of type f should have single-precision floating point value");
            }
        } else if (type.equals("H")) {
            try {
                return StringUtil.hexStringToBytes(stringVal);
            } catch (NumberFormatException e) {
                throw new SAMFormatException("Tag of type H should have valid hex string with even number of digits");
            }
        } else if (type.equals("B")) {
            return covertStringArrayToObject(stringVal);
        } else {
            throw new SAMFormatException("Unrecognized tag type: " + type);
        }
    }

    private static Object covertStringArrayToObject(final String stringVal) {
        final String[] elementTypeAndValue = new String[2];

        final int numberOfTokens = StringUtil.splitConcatenateExcessTokens(stringVal, elementTypeAndValue, ',');

        if (elementTypeAndValue[0].length() != 1) {
            throw new SAMFormatException("Unrecognized element type for array tag value: " + elementTypeAndValue[0]);
        }

        final char elementType = elementTypeAndValue[0].charAt(0);

        final String[] stringValues = elementTypeAndValue[1] != null ? elementTypeAndValue[1].split(",") : EMPTY_STRING_ARRAY;
        if (elementType == 'f') {
            final float[] ret = new float[stringValues.length];
            for (int i = 0; i < stringValues.length; ++i) {
                try {
                    ret[i] = Float.parseFloat(stringValues[i]);
                } catch (NumberFormatException e) {
                    throw new SAMFormatException("Array tag of type f should have single-precision floating point value");
                }
            }
            return ret;
        }
        long mask = Long.MAX_VALUE;
        long minValue = Long.MAX_VALUE;
        long maxValue = Long.MIN_VALUE;
        final boolean isUnsigned = Character.isUpperCase(elementType);
        switch (Character.toLowerCase(elementType)) {
            case 'c':
                if (isUnsigned) {
                    mask = 0xffL;
                } else {
                    minValue = Byte.MIN_VALUE;
                    maxValue = Byte.MAX_VALUE;
                }
                break;
            case 's':
                if (isUnsigned) {
                    mask = 0xffffL;
                } else {
                    minValue = Short.MIN_VALUE;
                    maxValue = Short.MAX_VALUE;
                }
                break;
            case 'i':
                if (isUnsigned) {
                    mask = 0xffffffffL;
                } else {
                    minValue = Integer.MIN_VALUE;
                    maxValue = Integer.MAX_VALUE;
                }
                break;
            default:
                throw new SAMFormatException("Unrecognized array tag element type: " + elementType);
        }
        if (isUnsigned) {
            minValue = 0;
            maxValue = mask;
        }
        final long[] longValues = new long[stringValues.length];
        for (int i = 0; i < stringValues.length; ++i) {
            final long longValue;
            try {
                longValue = Long.parseLong(stringValues[i]);
            } catch (NumberFormatException e) {
                throw new SAMFormatException("Array tag of type " + elementType + " should have integral value");
            }
            if (longValue < minValue || longValue > maxValue) {
                throw new SAMFormatException("Value for element of array tag of type " + elementType +
                " is out of allowed range: " + longValue);
            }
            longValues[i] = longValue;
        }

        switch (Character.toLowerCase(elementType)) {
            case 'c': {
                final byte[] array = new byte[longValues.length];
                for (int i = 0; i < longValues.length; ++i) array[i] = (byte)longValues[i];
                if (isUnsigned) return new TagValueAndUnsignedArrayFlag(array, true);
                else return array;
            }
            case 's': {
                final short[] array = new short[longValues.length];
                for (int i = 0; i < longValues.length; ++i) array[i] = (short)longValues[i];
                if (isUnsigned) return new TagValueAndUnsignedArrayFlag(array, true);
                else return array;
            }
            case 'i':{
                final int[] array = new int[longValues.length];
                for (int i = 0; i < longValues.length; ++i) array[i] = (int)longValues[i];
                if (isUnsigned) return new TagValueAndUnsignedArrayFlag(array, true);
                else return array;
            }
            default:
                throw new SAMFormatException("Unrecognized array tag element type: " + elementType);
        }
    }

    Iso8601Date decodeDate(final String dateStr) {
        try {
            return new Iso8601Date(dateStr);
        } catch (DateParser.InvalidDateException ex) {
            try {
                return new Iso8601Date(DateFormat.getDateTimeInstance().parse(dateStr));
            } catch (ParseException e) {
                try {
                    return new Iso8601Date(new Date(dateStr));
                } catch (Exception e1) {
                    throw new DateParser.InvalidDateException("Could not parse as date: " + dateStr, e);
                }
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy