com.ibm.icu.text.CollatorReader Maven / Gradle / Ivy

Go to download
/**
 *******************************************************************************
 * Copyright (C) 1996-2011, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;

import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.IntTrie;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.VersionInfo;

/**
 * 
 * Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
 * 
 * 
 * This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
 * of data for use in com.ibm.icu.text.Collator.
 * 
 * 
 * uca.icu which is in big-endian format is jared together with this package.
 * 
 * 
 * @author Syn Wee Quek
 * @since release 2.2, April 18 2002
 */

final class CollatorReader {
    static char[] read(RuleBasedCollator rbc, UCAConstants ucac,
                       LeadByteConstants leadByteConstants, Output maxUCAContractionLength)
            throws IOException {
        InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
        BufferedInputStream b = new BufferedInputStream(i, 90000);
        CollatorReader reader = new CollatorReader(b);
        char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength);
        b.close();
        return ucaContractions;
    }

    public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
        return new InputStream() {
            public int read() throws IOException {
                if (!buf.hasRemaining()) {
                    return -1;
                }
                return buf.get() & 0xff;
            }

            public int read(byte[] bytes, int off, int len) throws IOException {
                len = Math.min(len, buf.remaining());
                buf.get(bytes, off, len);
                return len;
            }
        };
    }

    static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
        final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
        int dataLength = data.remaining();
        // TODO: Change the rest of this class to use the ByteBuffer directly, rather than
        // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
        // Consider changing ICUBinary to also work with a ByteBuffer.
        CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
        if (dataLength > MIN_BINARY_DATA_SIZE_) {
            reader.readImp(rbc, null, null, null);
        } else {
            reader.readHeader(rbc, null);
            reader.readOptions(rbc);
            // duplicating UCA_'s data
            rbc.setWithUCATables();
        }
    }

    static InverseUCA getInverseUCA() throws IOException {
        InverseUCA result = null;
        InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
        // try {
        // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
        // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
        BufferedInputStream b = new BufferedInputStream(i, 110000);
        result = CollatorReader.readInverseUCA(b);
        b.close();
        i.close();
        return result;
        // } catch (Exception e) {
        // throw new RuntimeException(e.getMessage());
        // }
    }

    // protected constructor ---------------------------------------------

    /**
     * 
     * Protected constructor.
     * 
     * 
     * @param inputStream
     *            ICU collator file input stream
     * @exception IOException
     *                throw if data file fails authentication
     */
    private CollatorReader(InputStream inputStream) throws IOException {
        this(inputStream, true);
        /*
         * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
         * that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
         * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
         * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
         * DataInputStream(inputStream);
         */
    }

    /**
     * 
     * Protected constructor.
     * 
     * 
     * @param inputStream
     *            ICU uprops.icu file input stream
     * @param readICUHeader
     *            flag to indicate if the ICU header has to be read
     * @exception IOException
     *                throw if data file fails authentication
     */
    private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
        if (readICUHeader) {
            byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
            // weiv: check that we have the correct Unicode version in
            // binary files
            VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
            if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
                throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
            }
        }
        m_dataInputStream_ = new DataInputStream(inputStream);
    }

    // protected methods -------------------------------------------------

    /**
     * Read and break up the header stream of data passed in as arguments into meaningful Collator data.
     * 
     * @param rbc
     *            RuleBasedCollator to populate with header information
     * @exception IOException
     *                thrown when there's a data error.
     */
    private void readHeader(RuleBasedCollator rbc, Output maxUCAContractionLength) throws IOException {
        m_size_ = m_dataInputStream_.readInt();
        // all the offsets are in bytes
        // to get the address add to the header address and cast properly
        // Default options int options
        m_headerSize_ = m_dataInputStream_.readInt(); // start of options
        int readcount = 8; // for size and headersize
        // structure which holds values for indirect positioning and implicit
        // ranges
        m_UCAConstOffset_ = m_dataInputStream_.readInt();
        readcount += 4;
        // this one is needed only for UCA, to copy the appropriate
        // contractions
        /*int contractionUCACombos =*/ m_dataInputStream_.readInt();
        readcount += 4;
        // reserved for future use
        m_dataInputStream_.skipBytes(4);
        readcount += 4;
        // const uint8_t *mappingPosition;
        int mapping = m_dataInputStream_.readInt();
        readcount += 4;
        // uint32_t *expansion;
        rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
        readcount += 4;
        // UChar *contractionIndex;
        rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
        readcount += 4;
        // uint32_t *contractionCEs;
        int contractionCE = m_dataInputStream_.readInt();
        readcount += 4;
        // needed for various closures int contractionSize
        int contractionSize = m_dataInputStream_.readInt();
        readcount += 4;
        // array of last collation element in expansion
        int expansionEndCE = m_dataInputStream_.readInt();
        readcount += 4;
        // array of maximum expansion size corresponding to the expansion
        // collation elements with last element in expansionEndCE
        int expansionEndCEMaxSize = m_dataInputStream_.readInt();
        readcount += 4;
        // size of endExpansionCE int expansionEndCESize
        /* int endExpansionCECount = */m_dataInputStream_.readInt();
        readcount += 4;
        // hash table of unsafe code points
        int unsafe = m_dataInputStream_.readInt();
        readcount += 4;
        // hash table of final code points in contractions.
        int contractionEnd = m_dataInputStream_.readInt();
        readcount += 4;
        // int CEcount = m_dataInputStream_.readInt();
        int contractionUCACombosSize = m_dataInputStream_.readInt();
        readcount += 4;
        // is jamoSpecial
        rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
        readcount++;
        // isBigEndian and charSetFamily
        m_dataInputStream_.skipBytes(2);
        readcount += 2;
        int contractionUCACombosWidth = m_dataInputStream_.readByte();
        if (maxUCAContractionLength != null) {
            maxUCAContractionLength.value = contractionUCACombosWidth;
        }
        // We want to be able to output this value if it's not 0.
        assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null;
        readcount += 1;
        rbc.m_version_ = readVersion(m_dataInputStream_);
        readcount += 4;
        rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
        readcount += 4;
        rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
        readcount += 4;
        /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
        readcount += 4;
        rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
        readcount += 4;
        rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
        readcount += 4;

        // byte charsetName[] = new byte[32]; // for charset CEs
        m_dataInputStream_.skipBytes(32);
        readcount += 32;

        m_dataInputStream_.skipBytes(44); // for future use
        readcount += 44;
        if (m_headerSize_ < readcount) {
            // /CLOVER:OFF
            throw new IOException("Internal Error: Header size error");
            // /CLOVER:ON
        }
        m_dataInputStream_.skipBytes(m_headerSize_ - readcount);

        if (rbc.m_contractionOffset_ == 0) { // contraction can be null
            rbc.m_contractionOffset_ = mapping;
            contractionCE = mapping;
        }
        m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
        m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
        m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
        m_contractionCESize_ = mapping - contractionCE;
        // m_trieSize_ = expansionEndCE - mapping;
        m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
        m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
        m_unsafeSize_ = contractionEnd - unsafe;
        // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
        m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;

        // treat it as normal collator first
        // for normal collator there is no UCA contraction
        // contractions (UChar[contractionSize] + CE[contractionSize])
        m_contractionSize_ = contractionSize * 2 + contractionSize * 4;

        rbc.m_contractionOffset_ >>= 1; // casting to ints
        rbc.m_expansionOffset_ >>= 2; // casting to chars
    }

    /**
     * Read and break up the collation options passed in the stream of data and update the argument Collator with the
     * results
     * 
     * @param rbc
     *            RuleBasedCollator to populate
     * @exception IOException
     *                thrown when there's a data error.
     */
    private void readOptions(RuleBasedCollator rbc) throws IOException {
        int readcount = 0;
        rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
        readcount += 4;
        rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
        readcount += 4;
        rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
        readcount += 4;
        rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
        readcount += 4;
        // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
        // == RuleBasedCollator.AttributeValue.ON_);
        int defaultIsCaseLevel = m_dataInputStream_.readInt();
        rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
        readcount += 4;
        int value = m_dataInputStream_.readInt();
        readcount += 4;
        if (value == RuleBasedCollator.AttributeValue.ON_) {
            value = Collator.CANONICAL_DECOMPOSITION;
        } else {
            value = Collator.NO_DECOMPOSITION;
        }
        rbc.m_defaultDecomposition_ = value;
        rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
        readcount += 4;
        rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
        readcount += 4;
        rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
        readcount += 4;
        m_dataInputStream_.skip(60); // reserved for future use
        readcount += 60;
        m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
        if (m_optionSize_ < readcount) {
            // /CLOVER:OFF
            throw new IOException("Internal Error: Option size error");
            // /CLOVER:ON
        }
    }

    /**
     * Read and break up the stream of data passed in as arguments into meaningful Collator data.
     * 
     * @param rbc
     *            RuleBasedCollator to populate
     * @param UCAConst
     *            object to fill up with UCA constants if we are reading the UCA collator, if not use a null
     * @param leadByteConstants
     * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
     * @exception IOException
     *                thrown when there's a data error.
     */
    private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
            RuleBasedCollator.LeadByteConstants leadByteConstants,
            Output maxUCAContractionLength) throws IOException {
        char ucaContractions[] = null; // return result

        readHeader(rbc, maxUCAContractionLength);
        // header size has been checked by readHeader
        int readcount = m_headerSize_;
        // option size has been checked by readOptions
        readOptions(rbc);
        readcount += m_optionSize_;
        m_expansionSize_ >>= 2;
        rbc.m_expansion_ = new int[m_expansionSize_];
        for (int i = 0; i < m_expansionSize_; i++) {
            rbc.m_expansion_[i] = m_dataInputStream_.readInt();
        }
        readcount += (m_expansionSize_ << 2);
        if (m_contractionIndexSize_ > 0) {
            m_contractionIndexSize_ >>= 1;
            rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
            for (int i = 0; i < m_contractionIndexSize_; i++) {
                rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
            }
            readcount += (m_contractionIndexSize_ << 1);
            m_contractionCESize_ >>= 2;
            rbc.m_contractionCE_ = new int[m_contractionCESize_];
            for (int i = 0; i < m_contractionCESize_; i++) {
                rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
            }
            readcount += (m_contractionCESize_ << 2);
        }
        rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
        if (!rbc.m_trie_.isLatin1Linear()) {
            throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
                    + "latin one data arrays");
        }
        readcount += rbc.m_trie_.getSerializedDataSize();
        m_expansionEndCESize_ >>= 2;
        rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
        for (int i = 0; i < m_expansionEndCESize_; i++) {
            rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
        }
        readcount += (m_expansionEndCESize_ << 2);
        rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
        for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
            rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
        }
        readcount += m_expansionEndCEMaxSizeSize_;
        rbc.m_unsafe_ = new byte[m_unsafeSize_];
        for (int i = 0; i < m_unsafeSize_; i++) {
            rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
        }
        readcount += m_unsafeSize_;
        if (UCAConst != null) {
            // we are reading the UCA
            // unfortunately the UCA offset in any collator data is not 0 and
            // only refers to the UCA data
            // m_contractionSize_ -= m_UCAValuesSize_;
            m_contractionSize_ = m_UCAConstOffset_ - readcount;
        } else {
            m_contractionSize_ = m_size_ - readcount;
        }
        rbc.m_contractionEnd_ = new byte[m_contractionSize_];
        for (int i = 0; i < m_contractionSize_; i++) {
            rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
        }
        readcount += m_contractionSize_;
        if (UCAConst != null) {
            UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
            int readUCAConstcount = 4;
            UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;
            UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
            readUCAConstcount += 4;

            readcount += readUCAConstcount;

            int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
            assert resultsize == m_UCAcontractionSize_ / 2;
            ucaContractions = new char[resultsize];
            for (int i = 0; i < resultsize; i++) {
                ucaContractions[i] = m_dataInputStream_.readChar();
            }
            readcount += m_UCAcontractionSize_;
        }

        if (leadByteConstants != null) {
            readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
            leadByteConstants.read(m_dataInputStream_);
            readcount += leadByteConstants.getSerializedDataSize();
        }

        if (readcount != m_size_) {
            // /CLOVER:OFF
            throw new IOException("Internal Error: Data file size error");
            // /CLOVER:ON
        }
        return ucaContractions;
    }

    /**
     * Reads in the inverse uca data
     * 
     * @param input
     *            input stream with the inverse uca data
     * @return an object containing the inverse uca data
     * @exception IOException
     *                thrown when error occurs while reading the inverse uca
     */
    private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
        byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
                INVERSE_UCA_AUTHENTICATE_);

        // weiv: check that we have the correct Unicode version in
        // binary files
        VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
        if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
            throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
        }

        CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
        DataInputStream input = new DataInputStream(inputStream);
        input.readInt(); // bytesize
        int tablesize = input.readInt(); // in int size
        int contsize = input.readInt(); // in char size
        input.readInt(); // table in bytes
        input.readInt(); // conts in bytes
        result.m_UCA_version_ = readVersion(input);
        input.skipBytes(8); // skip padding

        int size = tablesize * 3; // one column for each strength
        result.m_table_ = new int[size];
        result.m_continuations_ = new char[contsize];

        for (int i = 0; i < size; i++) {
            result.m_table_[i] = input.readInt();
        }
        for (int i = 0; i < contsize; i++) {
            result.m_continuations_[i] = input.readChar();
        }
        input.close();
        return result;
    }

    /**
     * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
     * 
     * @param input
     *            already instantiated DataInputStream, positioned at the start of four version bytes
     * @return a ready VersionInfo object
     * @throws IOException
     *             thrown when error occurs while reading version bytes
     */

    protected static VersionInfo readVersion(DataInputStream input) throws IOException {
        byte[] version = new byte[4];
        version[0] = input.readByte();
        version[1] = input.readByte();
        version[2] = input.readByte();
        version[3] = input.readByte();

        VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
                (int) version[3]);

        return result;
    }

    // private inner class -----------------------------------------------

    // private variables -------------------------------------------------

    /**
     * Authenticate uca data format version
     */
    private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
        public boolean isDataVersionAcceptable(byte version[]) {
            return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
            // Too harsh
            // && version[1] == DATA_FORMAT_VERSION_[1]
            // && version[2] == DATA_FORMAT_VERSION_[2]
            // && version[3] == DATA_FORMAT_VERSION_[3];
        }
    };

    /**
     * Authenticate uca data format version
     */
    private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
        public boolean isDataVersionAcceptable(byte version[]) {
            return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
                    && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
        }
    };

    /**
     * Data input stream for uca.icu
     */
    private DataInputStream m_dataInputStream_;

    /**
     * File format version and id that this class understands. No guarantees are made if a older version is used
     */
    private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
    private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
    /**
     * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
     * used
     */
    private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
    private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };

    /**
     * Wrong unicode version error string
     */
    private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";

    /**
     * Size of expansion table in bytes
     */
    private int m_expansionSize_;
    /**
     * Size of contraction index table in bytes
     */
    private int m_contractionIndexSize_;
    /**
     * Size of contraction table in bytes
     */
    private int m_contractionCESize_;
    /*
     * Size of the Trie in bytes
     */
    // private int m_trieSize_;
    /**
     * Size of the table that contains information about collation elements that end with an expansion
     */
    private int m_expansionEndCESize_;
    /**
     * Size of the table that contains information about the maximum size of collation elements that end with a
     * particular expansion CE corresponding to the ones in expansionEndCE
     */
    private int m_expansionEndCEMaxSizeSize_;
    /**
     * Size of the option table that contains information about the collation options
     */
    private int m_optionSize_;
    /**
     * Size of the whole data file minusing the ICU header
     */
    private int m_size_;
    /**
     * Size of the collation data header
     */
    private int m_headerSize_;
    /**
     * Size of the table that contains information about the "Unsafe" codepoints
     */
    private int m_unsafeSize_;
    /**
     * Size in bytes of the table that contains information about codepoints that ends with a contraction
     */
    private int m_contractionSize_;
    /**
     * Size of the table that contains UCA contraction information in bytes
     */
    private int m_UCAcontractionSize_;
    /**
     * Offset of the UCA Const
     */
    private int m_UCAConstOffset_;

    // private methods ---------------------------------------------------

}