com.ibm.icu.impl.PropsVectors Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
/**
* Store bits (Unicode character properties) in bit set vectors.
*
* This is a port of the C++ class UPropsVectors from ICU4C
*
* @author Shaopeng Jia
* @internal
*/
package com.ibm.icu.impl;
import java.util.Arrays;
import java.util.Comparator;
/**
* Unicode Properties Vectors associated with code point ranges.
*
* Rows of primitive integers in a contiguous array store the range limits and
* the properties vectors.
*
* In each row, row[0] contains the start code point and row[1] contains the
* limit code point, which is the start of the next range.
*
* Initially, there is only one range [0..0x110000] with values 0.
*
* It would be possible to store only one range boundary per row, but
* self-contained rows allow to later sort them by contents.
*/
public class PropsVectors {
private int v[];
private int columns; // number of columns, plus two for start
// and limit values
private int maxRows;
private int rows;
private int prevRow; // search optimization: remember last row seen
private boolean isCompacted;
// internal function to compare elements in v and target. Return true iff
// elements in v starting from index1 to index1 + length - 1
// are exactly the same as elements in target
// starting from index2 to index2 + length - 1
private boolean areElementsSame(int index1, int[] target, int index2,
int length) {
for (int i = 0; i < length; ++i) {
if (v[index1 + i] != target[index2 + i]) {
return false;
}
}
return true;
}
// internal function which given rangeStart, returns
// index where v[index]<=rangeStart= v[index]) {
if (rangeStart < v[index + 1]) {
// same row as last seen
return index;
} else {
index += columns;
if (rangeStart < v[index + 1]) {
++prevRow;
return index;
} else {
index += columns;
if (rangeStart < v[index + 1]) {
prevRow += 2;
return index;
} else if ((rangeStart - v[index + 1]) < 10) {
// we are close, continue looping
prevRow += 2;
do {
++prevRow;
index += columns;
} while (rangeStart >= v[index + 1]);
return index;
}
}
}
} else if (rangeStart < v[1]) {
// the very first row
prevRow = 0;
return 0;
}
// do a binary search for the start of the range
int start = 0;
int mid = 0;
int limit = rows;
while (start < limit - 1) {
mid = (start + limit) / 2;
index = columns * mid;
if (rangeStart < v[index]) {
limit = mid;
} else if (rangeStart < v[index + 1]) {
prevRow = mid;
return index;
} else {
start = mid;
}
}
// must be found because all ranges together always cover
// all of Unicode
prevRow = start;
index = start * columns;
return index;
}
/*
* Special pseudo code points for storing the initialValue and the
* errorValue which are used to initialize a Trie or similar.
*/
public final static int FIRST_SPECIAL_CP = 0x110000;
public final static int INITIAL_VALUE_CP = 0x110000;
public final static int ERROR_VALUE_CP = 0x110001;
public final static int MAX_CP = 0x110001;
public final static int INITIAL_ROWS = 1 << 12;
public final static int MEDIUM_ROWS = 1 << 16;
public final static int MAX_ROWS = MAX_CP + 1;
/*
* Constructor.
* @param numOfColumns Number of value integers (32-bit int) per row.
*/
public PropsVectors(int numOfColumns) {
if (numOfColumns < 1) {
throw new IllegalArgumentException("numOfColumns need to be no "
+ "less than 1; but it is " + numOfColumns);
}
columns = numOfColumns + 2; // count range start and limit columns
v = new int[INITIAL_ROWS * columns];
maxRows = INITIAL_ROWS;
rows = 2 + (MAX_CP - FIRST_SPECIAL_CP);
prevRow = 0;
isCompacted = false;
v[0] = 0;
v[1] = 0x110000;
int index = columns;
for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) {
v[index] = cp;
v[index + 1] = cp + 1;
index += columns;
}
}
/*
* In rows for code points [start..end], select the column, reset the mask
* bits and set the value bits (ANDed with the mask).
*
* @throws IllegalArgumentException
*
* @throws IllegalStateException
*
* @throws IndexOutOfBoundsException
*/
public void setValue(int start, int end, int column, int value, int mask) {
if (start < 0 || start > end || end > MAX_CP || column < 0
|| column >= (columns - 2)) {
throw new IllegalArgumentException();
}
if (isCompacted) {
throw new IllegalStateException("Shouldn't be called after"
+ "compact()!");
}
int firstRow, lastRow;
int limit = end + 1;
boolean splitFirstRow, splitLastRow;
// skip range start and limit columns
column += 2;
value &= mask;
// find the rows whose ranges overlap with the input range
// find the first and last row, always successful
firstRow = findRow(start);
lastRow = findRow(end);
/*
* Rows need to be split if they partially overlap with the input range
* (only possible for the first and last rows) and if their value
* differs from the input value.
*/
splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask));
splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask));
// split first/last rows if necessary
if (splitFirstRow || splitLastRow) {
int rowsToExpand = 0;
if (splitFirstRow) {
++rowsToExpand;
}
if (splitLastRow) {
++rowsToExpand;
}
int newMaxRows = 0;
if ((rows + rowsToExpand) > maxRows) {
if (maxRows < MEDIUM_ROWS) {
newMaxRows = MEDIUM_ROWS;
} else if (maxRows < MAX_ROWS) {
newMaxRows = MAX_ROWS;
} else {
throw new IndexOutOfBoundsException(
"MAX_ROWS exceeded! Increase it to a higher value" +
"in the implementation");
}
int[] temp = new int[newMaxRows * columns];
System.arraycopy(v, 0, temp, 0, rows * columns);
v = temp;
maxRows = newMaxRows;
}
// count the number of row cells to move after the last row,
// and move them
int count = (rows * columns) - (lastRow + columns);
if (count > 0) {
System.arraycopy(v, lastRow + columns, v, lastRow
+ (1 + rowsToExpand) * columns, count);
}
rows += rowsToExpand;
// split the first row, and move the firstRow pointer
// to the second part
if (splitFirstRow) {
// copy all affected rows up one and move the lastRow pointer
count = lastRow - firstRow + columns;
System.arraycopy(v, firstRow, v, firstRow + columns, count);
lastRow += columns;
// split the range and move the firstRow pointer
v[firstRow + 1] = v[firstRow + columns] = start;
firstRow += columns;
}
// split the last row
if (splitLastRow) {
// copy the last row data
System.arraycopy(v, lastRow, v, lastRow + columns, columns);
// split the range and move the firstRow pointer
v[lastRow + 1] = v[lastRow + columns] = limit;
}
}
// set the "row last seen" to the last row for the range
prevRow = lastRow / columns;
// set the input value in all remaining rows
firstRow += column;
lastRow += column;
mask = ~mask;
for (;;) {
v[firstRow] = (v[firstRow] & mask) | value;
if (firstRow == lastRow) {
break;
}
firstRow += columns;
}
}
/*
* Always returns 0 if called after compact().
*/
public int getValue(int c, int column) {
if (isCompacted || c < 0 || c > MAX_CP || column < 0
|| column >= (columns - 2)) {
return 0;
}
int index = findRow(c);
return v[index + 2 + column];
}
/*
* Returns an array which contains value elements
* in row rowIndex.
*
* @throws IllegalStateException
* @throws IllegalArgumentException
*/
public int[] getRow(int rowIndex) {
if (isCompacted) {
throw new IllegalStateException(
"Illegal Invocation of the method after compact()");
}
if (rowIndex < 0 || rowIndex > rows) {
throw new IllegalArgumentException("rowIndex out of bound!");
}
int[] rowToReturn = new int[columns - 2];
System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0,
columns - 2);
return rowToReturn;
}
/*
* Returns an int which is the start codepoint
* in row rowIndex.
*
* @throws IllegalStateException
*
* @throws IllegalArgumentException
*/
public int getRowStart(int rowIndex) {
if (isCompacted) {
throw new IllegalStateException(
"Illegal Invocation of the method after compact()");
}
if (rowIndex < 0 || rowIndex > rows) {
throw new IllegalArgumentException("rowIndex out of bound!");
}
return v[rowIndex * columns];
}
/*
* Returns an int which is the limit codepoint
* minus 1 in row rowIndex.
*
* @throws IllegalStateException
*
* @throws IllegalArgumentException
*/
public int getRowEnd(int rowIndex) {
if (isCompacted) {
throw new IllegalStateException(
"Illegal Invocation of the method after compact()");
}
if (rowIndex < 0 || rowIndex > rows) {
throw new IllegalArgumentException("rowIndex out of bound!");
}
return v[rowIndex * columns + 1] - 1;
}
/*
* Compact the vectors:
* - modify the memory
* - keep only unique vectors
* - store them contiguously from the beginning of the memory
* - for each (non-unique) row, call the respective function in
* CompactHandler
*
* The handler's rowIndex is the index of the row in the compacted
* memory block. Therefore, it starts at 0 increases in increments of the
* columns value.
*
* In a first phase, only special values are delivered (each exactly once).
* Then CompactHandler::startRealValues() is called
* where rowIndex is the length of the compacted array.
* Then, in the second phase, the CompactHandler::setRowIndexForRange() is
* called for each row of real values.
*/
public void compact(CompactHandler compactor) {
if (isCompacted) {
return;
}
// Set the flag now: Sorting and compacting destroys the builder
// data structure.
isCompacted = true;
int valueColumns = columns - 2; // not counting start & limit
// sort the properties vectors to find unique vector values
Integer[] indexArray = new Integer[rows];
for (int i = 0; i < rows; ++i) {
indexArray[i] = columns * i;
}
Arrays.sort(indexArray, new Comparator() {
@Override
public int compare(Integer o1, Integer o2) {
int indexOfRow1 = o1.intValue();
int indexOfRow2 = o2.intValue();
int count = columns; // includes start/limit columns
// start comparing after start/limit
// but wrap around to them
int index = 2;
do {
if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) {
return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1
: 1;
}
if (++index == columns) {
index = 0;
}
} while (--count > 0);
return 0;
}
});
/*
* Find and set the special values. This has to do almost the same work
* as the compaction below, to find the indexes where the special-value
* rows will move.
*/
int count = -valueColumns;
for (int i = 0; i < rows; ++i) {
int start = v[indexArray[i].intValue()];
// count a new values vector if it is different
// from the current one
if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v,
indexArray[i-1].intValue() + 2, valueColumns)) {
count += valueColumns;
}
if (start == INITIAL_VALUE_CP) {
compactor.setRowIndexForInitialValue(count);
} else if (start == ERROR_VALUE_CP) {
compactor.setRowIndexForErrorValue(count);
}
}
// count is at the beginning of the last vector,
// add valueColumns to include that last vector
count += valueColumns;
// Call the handler once more to signal the start of
// delivering real values.
compactor.startRealValues(count);
/*
* Move vector contents up to a contiguous array with only unique
* vector values, and call the handler function for each vector.
*
* This destroys the Properties Vector structure and replaces it
* with an array of just vector values.
*/
int[] temp = new int[count];
count = -valueColumns;
for (int i = 0; i < rows; ++i) {
int start = v[indexArray[i].intValue()];
int limit = v[indexArray[i].intValue() + 1];
// count a new values vector if it is different
// from the current one
if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
temp, count, valueColumns)) {
count += valueColumns;
System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
valueColumns);
}
if (start < FIRST_SPECIAL_CP) {
compactor.setRowIndexForRange(start, limit - 1, count);
}
}
v = temp;
// count is at the beginning of the last vector,
// add one to include that last vector
rows = count / valueColumns + 1;
}
/*
* Get the vectors array after calling compact().
*
* @throws IllegalStateException
*/
public int[] getCompactedArray() {
if (!isCompacted) {
throw new IllegalStateException(
"Illegal Invocation of the method before compact()");
}
return v;
}
/*
* Get the number of rows for the compacted array.
*
* @throws IllegalStateException
*/
public int getCompactedRows() {
if (!isCompacted) {
throw new IllegalStateException(
"Illegal Invocation of the method before compact()");
}
return rows;
}
/*
* Get the number of columns for the compacted array.
*
* @throws IllegalStateException
*/
public int getCompactedColumns() {
if (!isCompacted) {
throw new IllegalStateException(
"Illegal Invocation of the method before compact()");
}
return columns - 2;
}
/*
* Call compact(), create a IntTrie with indexes into the compacted
* vectors array.
*/
public IntTrie compactToTrieWithRowIndexes() {
PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler();
compact(compactor);
return compactor.builder.serialize(new DefaultGetFoldedValue(
compactor.builder), new DefaultGetFoldingOffset());
}
// inner class implementation of Trie.DataManipulate
private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
@Override
public int getFoldingOffset(int value) {
return value;
}
}
// inner class implementation of TrieBuilder.DataManipulate
private static class DefaultGetFoldedValue implements
TrieBuilder.DataManipulate {
private IntTrieBuilder builder;
public DefaultGetFoldedValue(IntTrieBuilder inBuilder) {
builder = inBuilder;
}
@Override
public int getFoldedValue(int start, int offset) {
int initialValue = builder.m_initialValue_;
int limit = start + 0x400;
while (start < limit) {
boolean[] inBlockZero = new boolean[1];
int value = builder.getValue(start, inBlockZero);
if (inBlockZero[0]) {
start += TrieBuilder.DATA_BLOCK_LENGTH;
} else if (value != initialValue) {
return offset;
} else {
++start;
}
}
return 0;
}
}
public static interface CompactHandler {
public void setRowIndexForRange(int start, int end, int rowIndex);
public void setRowIndexForInitialValue(int rowIndex);
public void setRowIndexForErrorValue(int rowIndex);
public void startRealValues(int rowIndex);
}
}