All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.CharacterPropertiesImpl Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl;

import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.UnicodeSet;

/**
 * Properties functionality above class UCharacterProperty
 * but below class CharacterProperties and class UnicodeSet.
 */
public final class CharacterPropertiesImpl {
    private static final int NUM_INCLUSIONS = UCharacterProperty.SRC_COUNT +
            UProperty.INT_LIMIT - UProperty.INT_START;

    /**
     * A set of all characters _except_ the second through last characters of
     * certain ranges. These ranges are ranges of characters whose
     * properties are all exactly alike, e.g. CJK Ideographs from
     * U+4E00 to U+9FA5.
     */
    private static final UnicodeSet inclusions[] = new UnicodeSet[NUM_INCLUSIONS];

    /** For {@link UnicodeSet#setDefaultXSymbolTable}. */
    public static synchronized void clear() {
        for (int i = 0; i < inclusions.length; ++i) {
            inclusions[i] = null;
        }
    }

    private static UnicodeSet getInclusionsForSource(int src) {
        if (inclusions[src] == null) {
            UnicodeSet incl = new UnicodeSet();
            switch(src) {
            case UCharacterProperty.SRC_CHAR:
                UCharacterProperty.INSTANCE.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_PROPSVEC:
                UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
                UCharacterProperty.INSTANCE.addPropertyStarts(incl);
                UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_CASE_AND_NORM:
                Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
                UCaseProps.INSTANCE.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_NFC:
                Norm2AllModes.getNFCInstance().impl.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_NFKC:
                Norm2AllModes.getNFKCInstance().impl.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_NFKC_CF:
                Norm2AllModes.getNFKC_CFInstance().impl.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_NFC_CANON_ITER:
                Norm2AllModes.getNFCInstance().impl.addCanonIterPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_CASE:
                UCaseProps.INSTANCE.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_BIDI:
                UBiDiProps.INSTANCE.addPropertyStarts(incl);
                break;
            case UCharacterProperty.SRC_INPC:
            case UCharacterProperty.SRC_INSC:
            case UCharacterProperty.SRC_VO:
                UCharacterProperty.ulayout_addPropertyStarts(src, incl);
                break;
            case UCharacterProperty.SRC_EMOJI: {
                EmojiProps.INSTANCE.addPropertyStarts(incl);
                break;
            }
            case UCharacterProperty.SRC_IDSU:
                // New in Unicode 15.1 for just two characters.
                incl.add(0x2FFE);
                incl.add(0x2FFF + 1);
                break;
            case UCharacterProperty.SRC_ID_COMPAT_MATH:
                UCharacterProperty.mathCompat_addPropertyStarts(incl);
                break;
            default:
                throw new IllegalStateException("getInclusions(unknown src " + src + ")");
            }
            // We do not freeze() the set because we only iterate over it,
            // rather than testing contains(),
            // so the extra time and memory to optimize that are not necessary.
            inclusions[src] = incl.compact();
        }
        return inclusions[src];
    }

    private static UnicodeSet getIntPropInclusions(int prop) {
        assert(UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT);
        int inclIndex = UCharacterProperty.SRC_COUNT + prop - UProperty.INT_START;
        if (inclusions[inclIndex] != null) {
            return inclusions[inclIndex];
        }
        int src = UCharacterProperty.INSTANCE.getSource(prop);
        UnicodeSet incl = getInclusionsForSource(src);

        UnicodeSet intPropIncl = new UnicodeSet(0, 0);
        int numRanges = incl.getRangeCount();
        int prevValue = 0;
        for (int i = 0; i < numRanges; ++i) {
            int rangeEnd = incl.getRangeEnd(i);
            for (int c = incl.getRangeStart(i); c <= rangeEnd; ++c) {
                // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
                int value = UCharacter.getIntPropertyValue(c, prop);
                if (value != prevValue) {
                    intPropIncl.add(c);
                    prevValue = value;
                }
            }
        }

        // Compact for caching.
        return inclusions[inclIndex] = intPropIncl.compact();
    }

    /**
     * Returns a mutable UnicodeSet -- do not modify!
     */
    public static synchronized UnicodeSet getInclusionsForProperty(int prop) {
        if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) {
            return getIntPropInclusions(prop);
        } else {
            int src = UCharacterProperty.INSTANCE.getSource(prop);
            return getInclusionsForSource(src);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy