All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.CollatorReader Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/**
 *******************************************************************************
 * Copyright (C) 1996-2011, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;

import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.IntTrie;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.VersionInfo;

/**
 * 

* Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data. *

*

* This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks * of data for use in com.ibm.icu.text.Collator. *

*

* uca.icu which is in big-endian format is jared together with this package. *

* * @author Syn Wee Quek * @since release 2.2, April 18 2002 */ final class CollatorReader { static char[] read(RuleBasedCollator rbc, UCAConstants ucac, LeadByteConstants leadByteConstants, Output maxUCAContractionLength) throws IOException { InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu"); BufferedInputStream b = new BufferedInputStream(i, 90000); CollatorReader reader = new CollatorReader(b); char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength); b.close(); return ucaContractions; } public static InputStream makeByteBufferInputStream(final ByteBuffer buf) { return new InputStream() { public int read() throws IOException { if (!buf.hasRemaining()) { return -1; } return buf.get() & 0xff; } public int read(byte[] bytes, int off, int len) throws IOException { len = Math.min(len, buf.remaining()); buf.get(bytes, off, len); return len; } }; } static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException { final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2; int dataLength = data.remaining(); // TODO: Change the rest of this class to use the ByteBuffer directly, rather than // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader(). // Consider changing ICUBinary to also work with a ByteBuffer. CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false); if (dataLength > MIN_BINARY_DATA_SIZE_) { reader.readImp(rbc, null, null, null); } else { reader.readHeader(rbc, null); reader.readOptions(rbc); // duplicating UCA_'s data rbc.setWithUCATables(); } } static InverseUCA getInverseUCA() throws IOException { InverseUCA result = null; InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu"); // try { // String invdat = "/com/ibm/icu/impl/data/invuca.icu"; // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat); BufferedInputStream b = new BufferedInputStream(i, 110000); result = CollatorReader.readInverseUCA(b); b.close(); i.close(); return result; // } catch (Exception e) { // throw new RuntimeException(e.getMessage()); // } } // protected constructor --------------------------------------------- /** *

* Protected constructor. *

* * @param inputStream * ICU collator file input stream * @exception IOException * throw if data file fails authentication */ private CollatorReader(InputStream inputStream) throws IOException { this(inputStream, true); /* * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check * that we have the correct Unicode version in // binary files VersionInfo UCDVersion = * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new * DataInputStream(inputStream); */ } /** *

* Protected constructor. *

* * @param inputStream * ICU uprops.icu file input stream * @param readICUHeader * flag to indicate if the ICU header has to be read * @exception IOException * throw if data file fails authentication */ private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException { if (readICUHeader) { byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check that we have the correct Unicode version in // binary files VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } } m_dataInputStream_ = new DataInputStream(inputStream); } // protected methods ------------------------------------------------- /** * Read and break up the header stream of data passed in as arguments into meaningful Collator data. * * @param rbc * RuleBasedCollator to populate with header information * @exception IOException * thrown when there's a data error. */ private void readHeader(RuleBasedCollator rbc, Output maxUCAContractionLength) throws IOException { m_size_ = m_dataInputStream_.readInt(); // all the offsets are in bytes // to get the address add to the header address and cast properly // Default options int options m_headerSize_ = m_dataInputStream_.readInt(); // start of options int readcount = 8; // for size and headersize // structure which holds values for indirect positioning and implicit // ranges m_UCAConstOffset_ = m_dataInputStream_.readInt(); readcount += 4; // this one is needed only for UCA, to copy the appropriate // contractions /*int contractionUCACombos =*/ m_dataInputStream_.readInt(); readcount += 4; // reserved for future use m_dataInputStream_.skipBytes(4); readcount += 4; // const uint8_t *mappingPosition; int mapping = m_dataInputStream_.readInt(); readcount += 4; // uint32_t *expansion; rbc.m_expansionOffset_ = m_dataInputStream_.readInt(); readcount += 4; // UChar *contractionIndex; rbc.m_contractionOffset_ = m_dataInputStream_.readInt(); readcount += 4; // uint32_t *contractionCEs; int contractionCE = m_dataInputStream_.readInt(); readcount += 4; // needed for various closures int contractionSize int contractionSize = m_dataInputStream_.readInt(); readcount += 4; // array of last collation element in expansion int expansionEndCE = m_dataInputStream_.readInt(); readcount += 4; // array of maximum expansion size corresponding to the expansion // collation elements with last element in expansionEndCE int expansionEndCEMaxSize = m_dataInputStream_.readInt(); readcount += 4; // size of endExpansionCE int expansionEndCESize /* int endExpansionCECount = */m_dataInputStream_.readInt(); readcount += 4; // hash table of unsafe code points int unsafe = m_dataInputStream_.readInt(); readcount += 4; // hash table of final code points in contractions. int contractionEnd = m_dataInputStream_.readInt(); readcount += 4; // int CEcount = m_dataInputStream_.readInt(); int contractionUCACombosSize = m_dataInputStream_.readInt(); readcount += 4; // is jamoSpecial rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean(); readcount++; // isBigEndian and charSetFamily m_dataInputStream_.skipBytes(2); readcount += 2; int contractionUCACombosWidth = m_dataInputStream_.readByte(); if (maxUCAContractionLength != null) { maxUCAContractionLength.value = contractionUCACombosWidth; } // We want to be able to output this value if it's not 0. assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null; readcount += 1; rbc.m_version_ = readVersion(m_dataInputStream_); readcount += 4; rbc.m_UCA_version_ = readVersion(m_dataInputStream_); readcount += 4; rbc.m_UCD_version_ = readVersion(m_dataInputStream_); readcount += 4; /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_); readcount += 4; rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt(); readcount += 4; rbc.m_leadByteToScripts = m_dataInputStream_.readInt(); readcount += 4; // byte charsetName[] = new byte[32]; // for charset CEs m_dataInputStream_.skipBytes(32); readcount += 32; m_dataInputStream_.skipBytes(44); // for future use readcount += 44; if (m_headerSize_ < readcount) { // /CLOVER:OFF throw new IOException("Internal Error: Header size error"); // /CLOVER:ON } m_dataInputStream_.skipBytes(m_headerSize_ - readcount); if (rbc.m_contractionOffset_ == 0) { // contraction can be null rbc.m_contractionOffset_ = mapping; contractionCE = mapping; } m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_; m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_; m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_; m_contractionCESize_ = mapping - contractionCE; // m_trieSize_ = expansionEndCE - mapping; m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE; m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize; m_unsafeSize_ = contractionEnd - unsafe; // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2; // treat it as normal collator first // for normal collator there is no UCA contraction // contractions (UChar[contractionSize] + CE[contractionSize]) m_contractionSize_ = contractionSize * 2 + contractionSize * 4; rbc.m_contractionOffset_ >>= 1; // casting to ints rbc.m_expansionOffset_ >>= 2; // casting to chars } /** * Read and break up the collation options passed in the stream of data and update the argument Collator with the * results * * @param rbc * RuleBasedCollator to populate * @exception IOException * thrown when there's a data error. */ private void readOptions(RuleBasedCollator rbc) throws IOException { int readcount = 0; rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt(); readcount += 4; rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_); readcount += 4; rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_); readcount += 4; rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt(); readcount += 4; // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() // == RuleBasedCollator.AttributeValue.ON_); int defaultIsCaseLevel = m_dataInputStream_.readInt(); rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_); readcount += 4; int value = m_dataInputStream_.readInt(); readcount += 4; if (value == RuleBasedCollator.AttributeValue.ON_) { value = Collator.CANONICAL_DECOMPOSITION; } else { value = Collator.NO_DECOMPOSITION; } rbc.m_defaultDecomposition_ = value; rbc.m_defaultStrength_ = m_dataInputStream_.readInt(); readcount += 4; rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_); readcount += 4; rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_); readcount += 4; m_dataInputStream_.skip(60); // reserved for future use readcount += 60; m_dataInputStream_.skipBytes(m_optionSize_ - readcount); if (m_optionSize_ < readcount) { // /CLOVER:OFF throw new IOException("Internal Error: Option size error"); // /CLOVER:ON } } /** * Read and break up the stream of data passed in as arguments into meaningful Collator data. * * @param rbc * RuleBasedCollator to populate * @param UCAConst * object to fill up with UCA constants if we are reading the UCA collator, if not use a null * @param leadByteConstants * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator * @exception IOException * thrown when there's a data error. */ private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst, RuleBasedCollator.LeadByteConstants leadByteConstants, Output maxUCAContractionLength) throws IOException { char ucaContractions[] = null; // return result readHeader(rbc, maxUCAContractionLength); // header size has been checked by readHeader int readcount = m_headerSize_; // option size has been checked by readOptions readOptions(rbc); readcount += m_optionSize_; m_expansionSize_ >>= 2; rbc.m_expansion_ = new int[m_expansionSize_]; for (int i = 0; i < m_expansionSize_; i++) { rbc.m_expansion_[i] = m_dataInputStream_.readInt(); } readcount += (m_expansionSize_ << 2); if (m_contractionIndexSize_ > 0) { m_contractionIndexSize_ >>= 1; rbc.m_contractionIndex_ = new char[m_contractionIndexSize_]; for (int i = 0; i < m_contractionIndexSize_; i++) { rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar(); } readcount += (m_contractionIndexSize_ << 1); m_contractionCESize_ >>= 2; rbc.m_contractionCE_ = new int[m_contractionCESize_]; for (int i = 0; i < m_contractionCESize_; i++) { rbc.m_contractionCE_[i] = m_dataInputStream_.readInt(); } readcount += (m_contractionCESize_ << 2); } rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance()); if (!rbc.m_trie_.isLatin1Linear()) { throw new IOException("Data corrupted, " + "Collator Tries expected to have linear " + "latin one data arrays"); } readcount += rbc.m_trie_.getSerializedDataSize(); m_expansionEndCESize_ >>= 2; rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_]; for (int i = 0; i < m_expansionEndCESize_; i++) { rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt(); } readcount += (m_expansionEndCESize_ << 2); rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_]; for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) { rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte(); } readcount += m_expansionEndCEMaxSizeSize_; rbc.m_unsafe_ = new byte[m_unsafeSize_]; for (int i = 0; i < m_unsafeSize_; i++) { rbc.m_unsafe_[i] = m_dataInputStream_.readByte(); } readcount += m_unsafeSize_; if (UCAConst != null) { // we are reading the UCA // unfortunately the UCA offset in any collator data is not 0 and // only refers to the UCA data // m_contractionSize_ -= m_UCAValuesSize_; m_contractionSize_ = m_UCAConstOffset_ - readcount; } else { m_contractionSize_ = m_size_ - readcount; } rbc.m_contractionEnd_ = new byte[m_contractionSize_]; for (int i = 0; i < m_contractionSize_; i++) { rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte(); } readcount += m_contractionSize_; if (UCAConst != null) { UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); int readUCAConstcount = 4; UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt(); readUCAConstcount += 4; readcount += readUCAConstcount; int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2; assert resultsize == m_UCAcontractionSize_ / 2; ucaContractions = new char[resultsize]; for (int i = 0; i < resultsize; i++) { ucaContractions[i] = m_dataInputStream_.readChar(); } readcount += m_UCAcontractionSize_; } if (leadByteConstants != null) { readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount); leadByteConstants.read(m_dataInputStream_); readcount += leadByteConstants.getSerializedDataSize(); } if (readcount != m_size_) { // /CLOVER:OFF throw new IOException("Internal Error: Data file size error"); // /CLOVER:ON } return ucaContractions; } /** * Reads in the inverse uca data * * @param input * input stream with the inverse uca data * @return an object containing the inverse uca data * @exception IOException * thrown when error occurs while reading the inverse uca */ private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException { byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_); // weiv: check that we have the correct Unicode version in // binary files VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA(); DataInputStream input = new DataInputStream(inputStream); input.readInt(); // bytesize int tablesize = input.readInt(); // in int size int contsize = input.readInt(); // in char size input.readInt(); // table in bytes input.readInt(); // conts in bytes result.m_UCA_version_ = readVersion(input); input.skipBytes(8); // skip padding int size = tablesize * 3; // one column for each strength result.m_table_ = new int[size]; result.m_continuations_ = new char[contsize]; for (int i = 0; i < size; i++) { result.m_table_[i] = input.readInt(); } for (int i = 0; i < contsize; i++) { result.m_continuations_[i] = input.readChar(); } input.close(); return result; } /** * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions. * * @param input * already instantiated DataInputStream, positioned at the start of four version bytes * @return a ready VersionInfo object * @throws IOException * thrown when error occurs while reading version bytes */ protected static VersionInfo readVersion(DataInputStream input) throws IOException { byte[] version = new byte[4]; version[0] = input.readByte(); version[1] = input.readByte(); version[2] = input.readByte(); version[3] = input.readByte(); VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2], (int) version[3]); return result; } // private inner class ----------------------------------------------- // private variables ------------------------------------------------- /** * Authenticate uca data format version */ private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() { public boolean isDataVersionAcceptable(byte version[]) { return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1]; // Too harsh // && version[1] == DATA_FORMAT_VERSION_[1] // && version[2] == DATA_FORMAT_VERSION_[2] // && version[3] == DATA_FORMAT_VERSION_[3]; } }; /** * Authenticate uca data format version */ private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() { public boolean isDataVersionAcceptable(byte version[]) { return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0] && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1]; } }; /** * Data input stream for uca.icu */ private DataInputStream m_dataInputStream_; /** * File format version and id that this class understands. No guarantees are made if a older version is used */ private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 }; private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c }; /** * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is * used */ private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 }; private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 }; /** * Wrong unicode version error string */ private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version"; /** * Size of expansion table in bytes */ private int m_expansionSize_; /** * Size of contraction index table in bytes */ private int m_contractionIndexSize_; /** * Size of contraction table in bytes */ private int m_contractionCESize_; /* * Size of the Trie in bytes */ // private int m_trieSize_; /** * Size of the table that contains information about collation elements that end with an expansion */ private int m_expansionEndCESize_; /** * Size of the table that contains information about the maximum size of collation elements that end with a * particular expansion CE corresponding to the ones in expansionEndCE */ private int m_expansionEndCEMaxSizeSize_; /** * Size of the option table that contains information about the collation options */ private int m_optionSize_; /** * Size of the whole data file minusing the ICU header */ private int m_size_; /** * Size of the collation data header */ private int m_headerSize_; /** * Size of the table that contains information about the "Unsafe" codepoints */ private int m_unsafeSize_; /** * Size in bytes of the table that contains information about codepoints that ends with a contraction */ private int m_contractionSize_; /** * Size of the table that contains UCA contraction information in bytes */ private int m_UCAcontractionSize_; /** * Offset of the UCA Const */ private int m_UCAConstOffset_; // private methods --------------------------------------------------- }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy