All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.variant.variantcontext.writer.BCF2Encoder Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
* Copyright (c) 2012 The Broad Institute
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package htsjdk.variant.variantcontext.writer;

import htsjdk.variant.bcf2.BCF2Type;
import htsjdk.variant.bcf2.BCF2Utils;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

/**
 * See #BCFWriter for documentation on this classes role in encoding BCF2 files
 *
 * @author Mark DePristo
 * @since 06/12
 */
public final class BCF2Encoder {
    // TODO -- increase default size?
    public static final int WRITE_BUFFER_INITIAL_SIZE = 16384;
    private ByteArrayOutputStream encodeStream = new ByteArrayOutputStream(WRITE_BUFFER_INITIAL_SIZE);

    // --------------------------------------------------------------------------------
    //
    // Functions to return the data being encoded here
    //
    // --------------------------------------------------------------------------------

    public byte[] getRecordBytes() {
        byte[] bytes = encodeStream.toByteArray();
        encodeStream.reset();
        return bytes;
    }

    // --------------------------------------------------------------------------------
    //
    // Writing typed values (have type byte)
    //
    // --------------------------------------------------------------------------------

    public final void encodeTypedMissing(final BCF2Type type) throws IOException {
        encodeType(0, type);
    }

    public final void encodeTyped(final Object value, final BCF2Type type) throws IOException {
        if ( value == null )
            encodeTypedMissing(type);
        else {
            switch ( type ) {
                case INT8:
                case INT16:
                case INT32: encodeTypedInt((Integer)value, type); break;
                case FLOAT: encodeTypedFloat((Double) value); break;
                case CHAR:  encodeTypedString((String) value); break;
                default:    throw new IllegalArgumentException("Illegal type encountered " + type);
            }
        }
    }

    public final void encodeTypedInt(final int v) throws IOException {
        final BCF2Type type = BCF2Utils.determineIntegerType(v);
        encodeTypedInt(v, type);
    }

    public final void encodeTypedInt(final int v, final BCF2Type type) throws IOException {
        encodeType(1, type);
        encodeRawInt(v, type);
    }

    public final void encodeTypedString(final String s) throws IOException {
        encodeTypedString(s.getBytes());
    }

    public final void encodeTypedString(final byte[] s) throws IOException {
        if ( s == null )
            encodeType(0, BCF2Type.CHAR);
        else {
            encodeType(s.length, BCF2Type.CHAR);
            for ( int i = 0; i < s.length; i++ ) {
                encodeRawChar(s[i]);
            }
        }
    }

    public final void encodeTypedFloat(final double d) throws IOException {
        encodeType(1, BCF2Type.FLOAT);
        encodeRawFloat(d);
    }

    public final void encodeTyped(List v, final BCF2Type type) throws IOException {
        if ( type == BCF2Type.CHAR && !v.isEmpty()) {
            final String s = BCF2Utils.collapseStringList((List) v);
            v = stringToBytes(s);
        }

        encodeType(v.size(), type);
        encodeRawValues(v, type);
    }

    // --------------------------------------------------------------------------------
    //
    // Writing raw values (don't have a type byte)
    //
    // --------------------------------------------------------------------------------

    public final  void encodeRawValues(final Collection v, final BCF2Type type) throws IOException {
        for ( final T v1 : v ) {
            encodeRawValue(v1, type);
        }
    }

    public final  void encodeRawValue(final T value, final BCF2Type type) throws IOException {
        try {
            if ( value == type.getMissingJavaValue() )
                encodeRawMissingValue(type);
            else {
                switch (type) {
                    case INT8:
                    case INT16:
                    case INT32: encodeRawBytes((Integer) value, type); break;
                    case FLOAT: encodeRawFloat((Double) value); break;
                    case CHAR:  encodeRawChar((Byte) value); break;
                    default:    throw new IllegalArgumentException("Illegal type encountered " + type);
                }
            }
        } catch ( ClassCastException e ) {
            throw new ClassCastException("BUG: invalid type cast to " + type + " from " + value);
        }
    }

    public final void encodeRawMissingValue(final BCF2Type type) throws IOException {
        encodeRawBytes(type.getMissingBytes(), type);
    }

    public final void encodeRawMissingValues(final int size, final BCF2Type type) throws IOException {
        for ( int i = 0; i < size; i++ )
            encodeRawMissingValue(type);
    }

    // --------------------------------------------------------------------------------
    //
    // low-level encoders
    //
    // --------------------------------------------------------------------------------

    public final void encodeRawChar(final byte c) throws IOException {
        encodeStream.write(c);
    }

    public final void encodeRawFloat(final double value) throws IOException {
        encodeRawBytes(Float.floatToIntBits((float) value), BCF2Type.FLOAT);
    }

    public final void encodeType(final int size, final BCF2Type type) throws IOException {
        if ( size <= BCF2Utils.MAX_INLINE_ELEMENTS ) {
            final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
            encodeStream.write(typeByte);
        } else {
            final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type);
            encodeStream.write(typeByte);
            // write in the overflow size
            encodeTypedInt(size);
        }
    }

    public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
        type.write(value, encodeStream);
    }

    public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException {
        type.write(value, encodeStream);
    }

    // --------------------------------------------------------------------------------
    //
    // utility functions
    //
    // --------------------------------------------------------------------------------

    public void encodeRawString(final String s, final int sizeToWrite) throws IOException {
        final byte[] bytes = s.getBytes();
        for ( int i = 0; i < sizeToWrite; i++ )
            if ( i < bytes.length )
                encodeRawChar(bytes[i]);
            else
                encodeRawMissingValue(BCF2Type.CHAR);
    }

    /**
     * Totally generic encoder that examines o, determines the best way to encode it, and encodes it
     *
     * This method is incredibly slow, but it's only used for UnitTests so it doesn't matter
     *
     * @param o
     * @return
     */
    public final BCF2Type encode(final Object o) throws IOException {
        if ( o == null ) throw new IllegalArgumentException("Generic encode cannot deal with null values");

        if ( o instanceof List ) {
            final BCF2Type type = determineBCFType(((List) o).get(0));
            encodeTyped((List) o, type);
            return type;
        } else {
            final BCF2Type type = determineBCFType(o);
            encodeTyped(o, type);
            return type;
        }
    }

    private final BCF2Type determineBCFType(final Object arg) {
        final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;

        if ( toType instanceof Integer )
            return BCF2Utils.determineIntegerType((Integer) toType);
        else if ( toType instanceof String )
            return BCF2Type.CHAR;
        else if ( toType instanceof Double )
            return BCF2Type.FLOAT;
        else
            throw new IllegalArgumentException("No native encoding for Object of type " + arg.getClass().getSimpleName());
    }

    private final List stringToBytes(final String v) throws IOException {
        if ( v == null || v.equals("") )
            return Collections.emptyList();
        else {
            // TODO -- this needs to be optimized away for efficiency
            final byte[] bytes = v.getBytes();
            final List l = new ArrayList(bytes.length);
            for ( int i = 0; i < bytes.length; i++) l.add(bytes[i]);
            return l;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy