com.ibm.icu.text.CollatorReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
/**
*******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.text;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.IntTrie;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.VersionInfo;
/**
*
* Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
*
*
* This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
* of data for use in com.ibm.icu.text.Collator.
*
*
* uca.icu which is in big-endian format is jared together with this package.
*
*
* @author Syn Wee Quek
* @since release 2.2, April 18 2002
*/
final class CollatorReader {
static char[] read(RuleBasedCollator rbc, UCAConstants ucac,
LeadByteConstants leadByteConstants, Output maxUCAContractionLength)
throws IOException {
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
BufferedInputStream b = new BufferedInputStream(i, 90000);
CollatorReader reader = new CollatorReader(b);
char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength);
b.close();
return ucaContractions;
}
public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
return new InputStream() {
public int read() throws IOException {
if (!buf.hasRemaining()) {
return -1;
}
return buf.get() & 0xff;
}
public int read(byte[] bytes, int off, int len) throws IOException {
len = Math.min(len, buf.remaining());
buf.get(bytes, off, len);
return len;
}
};
}
static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
int dataLength = data.remaining();
// TODO: Change the rest of this class to use the ByteBuffer directly, rather than
// a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
// Consider changing ICUBinary to also work with a ByteBuffer.
CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
if (dataLength > MIN_BINARY_DATA_SIZE_) {
reader.readImp(rbc, null, null, null);
} else {
reader.readHeader(rbc, null);
reader.readOptions(rbc);
// duplicating UCA_'s data
rbc.setWithUCATables();
}
}
static InverseUCA getInverseUCA() throws IOException {
InverseUCA result = null;
InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
// try {
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
BufferedInputStream b = new BufferedInputStream(i, 110000);
result = CollatorReader.readInverseUCA(b);
b.close();
i.close();
return result;
// } catch (Exception e) {
// throw new RuntimeException(e.getMessage());
// }
}
// protected constructor ---------------------------------------------
/**
*
* Protected constructor.
*
*
* @param inputStream
* ICU collator file input stream
* @exception IOException
* throw if data file fails authentication
*/
private CollatorReader(InputStream inputStream) throws IOException {
this(inputStream, true);
/*
* byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
* that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
* UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
* UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
* DataInputStream(inputStream);
*/
}
/**
*
* Protected constructor.
*
*
* @param inputStream
* ICU uprops.icu file input stream
* @param readICUHeader
* flag to indicate if the ICU header has to be read
* @exception IOException
* throw if data file fails authentication
*/
private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
if (readICUHeader) {
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
// weiv: check that we have the correct Unicode version in
// binary files
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
}
}
m_dataInputStream_ = new DataInputStream(inputStream);
}
// protected methods -------------------------------------------------
/**
* Read and break up the header stream of data passed in as arguments into meaningful Collator data.
*
* @param rbc
* RuleBasedCollator to populate with header information
* @exception IOException
* thrown when there's a data error.
*/
private void readHeader(RuleBasedCollator rbc, Output maxUCAContractionLength) throws IOException {
m_size_ = m_dataInputStream_.readInt();
// all the offsets are in bytes
// to get the address add to the header address and cast properly
// Default options int options
m_headerSize_ = m_dataInputStream_.readInt(); // start of options
int readcount = 8; // for size and headersize
// structure which holds values for indirect positioning and implicit
// ranges
m_UCAConstOffset_ = m_dataInputStream_.readInt();
readcount += 4;
// this one is needed only for UCA, to copy the appropriate
// contractions
/*int contractionUCACombos =*/ m_dataInputStream_.readInt();
readcount += 4;
// reserved for future use
m_dataInputStream_.skipBytes(4);
readcount += 4;
// const uint8_t *mappingPosition;
int mapping = m_dataInputStream_.readInt();
readcount += 4;
// uint32_t *expansion;
rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
readcount += 4;
// UChar *contractionIndex;
rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
readcount += 4;
// uint32_t *contractionCEs;
int contractionCE = m_dataInputStream_.readInt();
readcount += 4;
// needed for various closures int contractionSize
int contractionSize = m_dataInputStream_.readInt();
readcount += 4;
// array of last collation element in expansion
int expansionEndCE = m_dataInputStream_.readInt();
readcount += 4;
// array of maximum expansion size corresponding to the expansion
// collation elements with last element in expansionEndCE
int expansionEndCEMaxSize = m_dataInputStream_.readInt();
readcount += 4;
// size of endExpansionCE int expansionEndCESize
/* int endExpansionCECount = */m_dataInputStream_.readInt();
readcount += 4;
// hash table of unsafe code points
int unsafe = m_dataInputStream_.readInt();
readcount += 4;
// hash table of final code points in contractions.
int contractionEnd = m_dataInputStream_.readInt();
readcount += 4;
// int CEcount = m_dataInputStream_.readInt();
int contractionUCACombosSize = m_dataInputStream_.readInt();
readcount += 4;
// is jamoSpecial
rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
readcount++;
// isBigEndian and charSetFamily
m_dataInputStream_.skipBytes(2);
readcount += 2;
int contractionUCACombosWidth = m_dataInputStream_.readByte();
if (maxUCAContractionLength != null) {
maxUCAContractionLength.value = contractionUCACombosWidth;
}
// We want to be able to output this value if it's not 0.
assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null;
readcount += 1;
rbc.m_version_ = readVersion(m_dataInputStream_);
readcount += 4;
rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
readcount += 4;
rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
readcount += 4;
/*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
readcount += 4;
rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
readcount += 4;
rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
readcount += 4;
// byte charsetName[] = new byte[32]; // for charset CEs
m_dataInputStream_.skipBytes(32);
readcount += 32;
m_dataInputStream_.skipBytes(44); // for future use
readcount += 44;
if (m_headerSize_ < readcount) {
// /CLOVER:OFF
throw new IOException("Internal Error: Header size error");
// /CLOVER:ON
}
m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
if (rbc.m_contractionOffset_ == 0) { // contraction can be null
rbc.m_contractionOffset_ = mapping;
contractionCE = mapping;
}
m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
m_contractionCESize_ = mapping - contractionCE;
// m_trieSize_ = expansionEndCE - mapping;
m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
m_unsafeSize_ = contractionEnd - unsafe;
// m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
// treat it as normal collator first
// for normal collator there is no UCA contraction
// contractions (UChar[contractionSize] + CE[contractionSize])
m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
rbc.m_contractionOffset_ >>= 1; // casting to ints
rbc.m_expansionOffset_ >>= 2; // casting to chars
}
/**
* Read and break up the collation options passed in the stream of data and update the argument Collator with the
* results
*
* @param rbc
* RuleBasedCollator to populate
* @exception IOException
* thrown when there's a data error.
*/
private void readOptions(RuleBasedCollator rbc) throws IOException {
int readcount = 0;
rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
readcount += 4;
rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
readcount += 4;
rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
readcount += 4;
rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
readcount += 4;
// rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
// == RuleBasedCollator.AttributeValue.ON_);
int defaultIsCaseLevel = m_dataInputStream_.readInt();
rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
readcount += 4;
int value = m_dataInputStream_.readInt();
readcount += 4;
if (value == RuleBasedCollator.AttributeValue.ON_) {
value = Collator.CANONICAL_DECOMPOSITION;
} else {
value = Collator.NO_DECOMPOSITION;
}
rbc.m_defaultDecomposition_ = value;
rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
readcount += 4;
rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
readcount += 4;
rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
readcount += 4;
m_dataInputStream_.skip(60); // reserved for future use
readcount += 60;
m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
if (m_optionSize_ < readcount) {
// /CLOVER:OFF
throw new IOException("Internal Error: Option size error");
// /CLOVER:ON
}
}
/**
* Read and break up the stream of data passed in as arguments into meaningful Collator data.
*
* @param rbc
* RuleBasedCollator to populate
* @param UCAConst
* object to fill up with UCA constants if we are reading the UCA collator, if not use a null
* @param leadByteConstants
* @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
* @exception IOException
* thrown when there's a data error.
*/
private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
RuleBasedCollator.LeadByteConstants leadByteConstants,
Output maxUCAContractionLength) throws IOException {
char ucaContractions[] = null; // return result
readHeader(rbc, maxUCAContractionLength);
// header size has been checked by readHeader
int readcount = m_headerSize_;
// option size has been checked by readOptions
readOptions(rbc);
readcount += m_optionSize_;
m_expansionSize_ >>= 2;
rbc.m_expansion_ = new int[m_expansionSize_];
for (int i = 0; i < m_expansionSize_; i++) {
rbc.m_expansion_[i] = m_dataInputStream_.readInt();
}
readcount += (m_expansionSize_ << 2);
if (m_contractionIndexSize_ > 0) {
m_contractionIndexSize_ >>= 1;
rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
for (int i = 0; i < m_contractionIndexSize_; i++) {
rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
}
readcount += (m_contractionIndexSize_ << 1);
m_contractionCESize_ >>= 2;
rbc.m_contractionCE_ = new int[m_contractionCESize_];
for (int i = 0; i < m_contractionCESize_; i++) {
rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
}
readcount += (m_contractionCESize_ << 2);
}
rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
if (!rbc.m_trie_.isLatin1Linear()) {
throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
+ "latin one data arrays");
}
readcount += rbc.m_trie_.getSerializedDataSize();
m_expansionEndCESize_ >>= 2;
rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
for (int i = 0; i < m_expansionEndCESize_; i++) {
rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
}
readcount += (m_expansionEndCESize_ << 2);
rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
}
readcount += m_expansionEndCEMaxSizeSize_;
rbc.m_unsafe_ = new byte[m_unsafeSize_];
for (int i = 0; i < m_unsafeSize_; i++) {
rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
}
readcount += m_unsafeSize_;
if (UCAConst != null) {
// we are reading the UCA
// unfortunately the UCA offset in any collator data is not 0 and
// only refers to the UCA data
// m_contractionSize_ -= m_UCAValuesSize_;
m_contractionSize_ = m_UCAConstOffset_ - readcount;
} else {
m_contractionSize_ = m_size_ - readcount;
}
rbc.m_contractionEnd_ = new byte[m_contractionSize_];
for (int i = 0; i < m_contractionSize_; i++) {
rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
}
readcount += m_contractionSize_;
if (UCAConst != null) {
UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
int readUCAConstcount = 4;
UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
readUCAConstcount += 4;
readcount += readUCAConstcount;
int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
assert resultsize == m_UCAcontractionSize_ / 2;
ucaContractions = new char[resultsize];
for (int i = 0; i < resultsize; i++) {
ucaContractions[i] = m_dataInputStream_.readChar();
}
readcount += m_UCAcontractionSize_;
}
if (leadByteConstants != null) {
readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
leadByteConstants.read(m_dataInputStream_);
readcount += leadByteConstants.getSerializedDataSize();
}
if (readcount != m_size_) {
// /CLOVER:OFF
throw new IOException("Internal Error: Data file size error");
// /CLOVER:ON
}
return ucaContractions;
}
/**
* Reads in the inverse uca data
*
* @param input
* input stream with the inverse uca data
* @return an object containing the inverse uca data
* @exception IOException
* thrown when error occurs while reading the inverse uca
*/
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
INVERSE_UCA_AUTHENTICATE_);
// weiv: check that we have the correct Unicode version in
// binary files
VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
}
CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
DataInputStream input = new DataInputStream(inputStream);
input.readInt(); // bytesize
int tablesize = input.readInt(); // in int size
int contsize = input.readInt(); // in char size
input.readInt(); // table in bytes
input.readInt(); // conts in bytes
result.m_UCA_version_ = readVersion(input);
input.skipBytes(8); // skip padding
int size = tablesize * 3; // one column for each strength
result.m_table_ = new int[size];
result.m_continuations_ = new char[contsize];
for (int i = 0; i < size; i++) {
result.m_table_[i] = input.readInt();
}
for (int i = 0; i < contsize; i++) {
result.m_continuations_[i] = input.readChar();
}
input.close();
return result;
}
/**
* Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
*
* @param input
* already instantiated DataInputStream, positioned at the start of four version bytes
* @return a ready VersionInfo object
* @throws IOException
* thrown when error occurs while reading version bytes
*/
protected static VersionInfo readVersion(DataInputStream input) throws IOException {
byte[] version = new byte[4];
version[0] = input.readByte();
version[1] = input.readByte();
version[2] = input.readByte();
version[3] = input.readByte();
VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
(int) version[3]);
return result;
}
// private inner class -----------------------------------------------
// private variables -------------------------------------------------
/**
* Authenticate uca data format version
*/
private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
// Too harsh
// && version[1] == DATA_FORMAT_VERSION_[1]
// && version[2] == DATA_FORMAT_VERSION_[2]
// && version[3] == DATA_FORMAT_VERSION_[3];
}
};
/**
* Authenticate uca data format version
*/
private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
&& version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
}
};
/**
* Data input stream for uca.icu
*/
private DataInputStream m_dataInputStream_;
/**
* File format version and id that this class understands. No guarantees are made if a older version is used
*/
private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
/**
* Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
* used
*/
private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
/**
* Wrong unicode version error string
*/
private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
/**
* Size of expansion table in bytes
*/
private int m_expansionSize_;
/**
* Size of contraction index table in bytes
*/
private int m_contractionIndexSize_;
/**
* Size of contraction table in bytes
*/
private int m_contractionCESize_;
/*
* Size of the Trie in bytes
*/
// private int m_trieSize_;
/**
* Size of the table that contains information about collation elements that end with an expansion
*/
private int m_expansionEndCESize_;
/**
* Size of the table that contains information about the maximum size of collation elements that end with a
* particular expansion CE corresponding to the ones in expansionEndCE
*/
private int m_expansionEndCEMaxSizeSize_;
/**
* Size of the option table that contains information about the collation options
*/
private int m_optionSize_;
/**
* Size of the whole data file minusing the ICU header
*/
private int m_size_;
/**
* Size of the collation data header
*/
private int m_headerSize_;
/**
* Size of the table that contains information about the "Unsafe" codepoints
*/
private int m_unsafeSize_;
/**
* Size in bytes of the table that contains information about codepoints that ends with a contraction
*/
private int m_contractionSize_;
/**
* Size of the table that contains UCA contraction information in bytes
*/
private int m_UCAcontractionSize_;
/**
* Offset of the UCA Const
*/
private int m_UCAConstOffset_;
// private methods ---------------------------------------------------
}