All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.IntTrie Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 ******************************************************************************
 * Copyright (C) 1996-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 ******************************************************************************
 */

package com.ibm.icu.impl;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;

import com.ibm.icu.text.UTF16;

/**
 * Trie implementation which stores data in int, 32 bits.
 * 2015-sep-03: Used only in CharsetSelector which could be switched to {@link Trie2_32}
 * as long as that does not load ICU4C selector data.
 *
 * @author synwee
 * @see com.ibm.icu.impl.Trie
 * @since release 2.1, Jan 01 2002
 */
public class IntTrie extends Trie
{
    // public constructors ---------------------------------------------

    /**
    * 

Creates a new Trie with the settings for the trie data.

*

Unserialize the 32-bit-aligned input stream and use the data for the * trie.

* @param bytes file buffer to a ICU data file, containing the trie * @param dataManipulate object which provides methods to parse the char * data * @throws IOException thrown when data reading fails */ public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate) throws IOException { super(bytes, dataManipulate); if (!isIntTrie()) { throw new IllegalArgumentException( "Data given does not belong to a int trie."); } } /** * Make a dummy IntTrie. * A dummy trie is an empty runtime trie, used when a real data trie cannot * be loaded. * * The trie always returns the initialValue, * or the leadUnitValue for lead surrogate code points. * The Latin-1 part is always set up to be linear. * * @param initialValue the initial value that is set for all code points * @param leadUnitValue the value for lead surrogate code _units_ that do not * have associated supplementary data * @param dataManipulate object which provides methods to parse the char data */ @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770 public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); int dataLength, latin1Length, i, limit; char block; /* calculate the actual size of the dummy trie data */ /* max(Latin-1, block 0) */ dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; if(leadUnitValue!=initialValue) { dataLength+=DATA_BLOCK_LENGTH; } m_data_=new int[dataLength]; m_dataLength_=dataLength; m_initialValue_=initialValue; /* fill the index and data arrays */ /* indexes are preset to 0 (block 0) */ /* Latin-1 data */ for(i=0; i>INDEX_STAGE_2_SHIFT_); i=0xd800>>INDEX_STAGE_1_SHIFT_; limit=0xdc00>>INDEX_STAGE_1_SHIFT_; for(; i> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); return (offset >= 0) ? m_data_[offset] : m_initialValue_; } /** * Gets the value to the data which this lead surrogate character points * to. * Returned data may contain folding offset information for the next * trailing surrogate character. * This method does not guarantee correct results for trail surrogates. * @param ch lead surrogate character * @return data value */ public final int getLeadValue(char ch) { return m_data_[getLeadOffset(ch)]; } /** * Get the value associated with the BMP code point. * Lead surrogate code points are treated as normal code points, with * unfolded values that may differ from getLeadValue() results. * @param ch the input BMP code point * @return trie data value associated with the BMP codepoint */ public final int getBMPValue(char ch) { return m_data_[getBMPOffset(ch)]; } /** * Get the value associated with a pair of surrogates. * @param lead a lead surrogate * @param trail a trail surrogate */ public final int getSurrogateValue(char lead, char trail) { if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) { throw new IllegalArgumentException( "Argument characters do not form a supplementary character"); } // get fold position for the next trail surrogate int offset = getSurrogateOffset(lead, trail); // get the real data from the folded lead/trail units if (offset > 0) { return m_data_[offset]; } // return m_initialValue_ if there is an error return m_initialValue_; } /** * Get a value from a folding offset (from the value of a lead surrogate) * and a trail surrogate. * @param leadvalue the value of a lead surrogate that contains the * folding offset * @param trail surrogate * @return trie data value associated with the trail character */ public final int getTrailValue(int leadvalue, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } int offset = m_dataManipulate_.getFoldingOffset(leadvalue); if (offset > 0) { return m_data_[getRawOffset(offset, (char)(trail & SURROGATE_MASK_))]; } return m_initialValue_; } /** *

Gets the latin 1 fast path value.

*

Note this only works if latin 1 characters have their own linear * array.

* @param ch latin 1 characters * @return value associated with latin character */ public final int getLatin1LinearValue(char ch) { return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch]; } /** * Checks if the argument Trie has the same data as this Trie * @param other Trie to check * @return true if the argument Trie has the same data as this Trie, false * otherwise */ ///CLOVER:OFF @Override public boolean equals(Object other) { boolean result = super.equals(other); if (result && other instanceof IntTrie) { IntTrie othertrie = (IntTrie)other; if (m_initialValue_ != othertrie.m_initialValue_ || !Arrays.equals(m_data_, othertrie.m_data_)) { return false; } return true; } return false; } @Override public int hashCode() { assert false : "hashCode not designed"; return 42; } ///CLOVER:ON // protected methods ----------------------------------------------- /** *

Parses the input stream and stores its trie content into a index and * data array

* @param bytes data buffer containing trie data */ @Override protected final void unserialize(ByteBuffer bytes) { super.unserialize(bytes); // one used for initial value m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0); m_initialValue_ = m_data_[0]; } /** * Gets the offset to the data which the surrogate pair points to. * @param lead lead surrogate * @param trail trailing surrogate * @return offset to data */ @Override protected final int getSurrogateOffset(char lead, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } // get fold position for the next trail surrogate int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); // get the real data from the folded lead/trail units if (offset > 0) { return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); } // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return -1; } /** * Gets the value at the argument index. * For use internally in TrieIterator * @param index value at index will be retrieved * @return 32 bit value * @see com.ibm.icu.impl.TrieIterator */ @Override protected final int getValue(int index) { return m_data_[index]; } /** * Gets the default initial value * @return 32 bit value */ @Override protected final int getInitialValue() { return m_initialValue_; } // package private methods ----------------------------------------- /** * Internal constructor for builder use * @param index the index array to be slotted into this trie * @param data the data array to be slotted into this trie * @param initialvalue the initial value for this trie * @param options trie options to use * @param datamanipulate folding implementation */ IntTrie(char index[], int data[], int initialvalue, int options, DataManipulate datamanipulate) { super(index, options, datamanipulate); m_data_ = data; m_dataLength_ = m_data_.length; m_initialValue_ = initialvalue; } // private data members -------------------------------------------- /** * Default value */ private int m_initialValue_; /** * Array of char data */ private int m_data_[]; }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy