All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.lang.CharacterProperties Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

package com.ibm.icu.lang;

import com.ibm.icu.impl.CharacterPropertiesImpl;
import com.ibm.icu.impl.EmojiProps;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.CodePointMap;
import com.ibm.icu.util.CodePointTrie;
import com.ibm.icu.util.MutableCodePointTrie;

/**
 * Sets and maps for Unicode properties.
 * The methods here return an object per property:
 * A set for each ICU-supported binary property with all code points for which the property is true.
 * A map for each ICU-supported enumerated/catalog/int-valued property
 * which maps all Unicode code points to their values for that property.
 *
 * 

For details see the method descriptions. * For lookup of property values by code point see class {@link UCharacter}. * * @stable ICU 63 */ public final class CharacterProperties { private CharacterProperties() {} // all-static private static final UnicodeSet sets[] = new UnicodeSet[UProperty.BINARY_LIMIT]; private static final CodePointMap maps[] = new CodePointMap[UProperty.INT_LIMIT - UProperty.INT_START]; private static UnicodeSet makeSet(int property) { UnicodeSet set = new UnicodeSet(); if (UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI) { // property of strings EmojiProps.INSTANCE.addStrings(property, set); if (property != UProperty.BASIC_EMOJI && property != UProperty.RGI_EMOJI) { // property of _only_ strings return set.freeze(); } } UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(property); int numRanges = inclusions.getRangeCount(); int startHasProperty = -1; for (int i = 0; i < numRanges; ++i) { int rangeEnd = inclusions.getRangeEnd(i); for (int c = inclusions.getRangeStart(i); c <= rangeEnd; ++c) { // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. if (UCharacter.hasBinaryProperty(c, property)) { if (startHasProperty < 0) { // Transition from false to true. startHasProperty = c; } } else if (startHasProperty >= 0) { // Transition from true to false. set.add(startHasProperty, c - 1); startHasProperty = -1; } } } if (startHasProperty >= 0) { set.add(startHasProperty, 0x10FFFF); } return set.freeze(); } private static CodePointMap makeMap(int property) { int nullValue = property == UProperty.SCRIPT ? UScript.UNKNOWN : 0; MutableCodePointTrie mutableTrie = new MutableCodePointTrie(nullValue, nullValue); UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(property); int numRanges = inclusions.getRangeCount(); int start = 0; int value = nullValue; for (int i = 0; i < numRanges; ++i) { int rangeEnd = inclusions.getRangeEnd(i); for (int c = inclusions.getRangeStart(i); c <= rangeEnd; ++c) { // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. int nextValue = UCharacter.getIntPropertyValue(c, property); if (value != nextValue) { if (value != nullValue) { mutableTrie.setRange(start, c - 1, value); } start = c; value = nextValue; } } } if (value != 0) { mutableTrie.setRange(start, 0x10FFFF, value); } CodePointTrie.Type type; if (property == UProperty.BIDI_CLASS || property == UProperty.GENERAL_CATEGORY) { type = CodePointTrie.Type.FAST; } else { type = CodePointTrie.Type.SMALL; } CodePointTrie.ValueWidth valueWidth; // TODO: UCharacterProperty.IntProperty int max = UCharacter.getIntPropertyMaxValue(property); if (max <= 0xff) { valueWidth = CodePointTrie.ValueWidth.BITS_8; } else if (max <= 0xffff) { valueWidth = CodePointTrie.ValueWidth.BITS_16; } else { valueWidth = CodePointTrie.ValueWidth.BITS_32; } return mutableTrie.buildImmutable(type, valueWidth); } /** * Returns a frozen UnicodeSet for a binary property. * Throws an exception if the property number is not one for a binary property. * *

The returned set contains all code points for which the property is true. * * @param property {@link UProperty#BINARY_START}..{@link UProperty#BINARY_LIMIT}-1 * @return the property as a set * @see UProperty * @see UCharacter#hasBinaryProperty * @stable ICU 63 */ public static final UnicodeSet getBinaryPropertySet(int property) { if (property < 0 || UProperty.BINARY_LIMIT <= property) { throw new IllegalArgumentException("" + property + " is not a constant for a UProperty binary property"); } synchronized(sets) { UnicodeSet set = sets[property]; if (set == null) { sets[property] = set = makeSet(property); } return set; } } /** * Returns an immutable CodePointMap for an enumerated/catalog/int-valued property. * Throws an exception if the property number is not one for an "int property". * *

The returned object maps all Unicode code points to their values for that property. * For documentation of the integer values see {@link UCharacter#getIntPropertyValue(int, int)}. * *

The actual type of the returned object differs between properties * and may change over time. * * @param property {@link UProperty#INT_START}..{@link UProperty#INT_LIMIT}-1 * @return the property as a map * @see UProperty * @see UCharacter#getIntPropertyValue * @stable ICU 63 */ public static final CodePointMap getIntPropertyMap(int property) { if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) { throw new IllegalArgumentException("" + property + " is not a constant for a UProperty int property"); } synchronized(maps) { CodePointMap map = maps[property - UProperty.INT_START]; if (map == null) { maps[property - UProperty.INT_START] = map = makeMap(property); } return map; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy