All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.EmojiProps Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// emojiprops.h
// created: 2021sep06 Markus W. Scherer

package com.ibm.icu.impl;

import java.io.IOException;
import java.nio.ByteBuffer;

import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.CharsTrie;
import com.ibm.icu.util.CodePointMap;
import com.ibm.icu.util.CodePointTrie;
import com.ibm.icu.util.ICUUncheckedIOException;

public final class EmojiProps {
    private static final class IsAcceptable implements ICUBinary.Authenticate {
        @Override
        public boolean isDataVersionAcceptable(byte version[]) {
            return version[0] == 1;
        }
    }
    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
    private static final int DATA_FORMAT = 0x456d6f6a;  // "Emoj"

    // Byte offsets from the start of the data, after the generic header,
    // in ascending order.
    // UCPTrie=CodePointTrie, follows the indexes
    private static final int IX_CPTRIE_OFFSET = 0;

    // UCharsTrie=CharsTrie
    private static final int IX_BASIC_EMOJI_TRIE_OFFSET = 4;
    //ivate static final int IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET = 5;
    //ivate static final int IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET = 6;
    //ivate static final int IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET = 7;
    //ivate static final int IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET = 8;
    private static final int IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET = 9;

    // Properties in the code point trie.
    // https://www.unicode.org/reports/tr51/#Emoji_Properties
    private static final int BIT_EMOJI = 0;
    private static final int BIT_EMOJI_PRESENTATION = 1;
    private static final int BIT_EMOJI_MODIFIER = 2;
    private static final int BIT_EMOJI_MODIFIER_BASE = 3;
    private static final int BIT_EMOJI_COMPONENT = 4;
    private static final int BIT_EXTENDED_PICTOGRAPHIC = 5;
    // https://www.unicode.org/reports/tr51/#Emoji_Sets
    private static final int BIT_BASIC_EMOJI = 6;

    public static final EmojiProps INSTANCE = new EmojiProps();

    private CodePointTrie.Fast8 cpTrie = null;
    private String stringTries[] = new String[6];

    /** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */
    private static int getStringTrieIndex(int i) {
        return i - IX_BASIC_EMOJI_TRIE_OFFSET;
    }

    private EmojiProps() {
        ByteBuffer bytes = ICUBinary.getRequiredData("uemoji.icu");
        try {
            ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
            int startPos = bytes.position();

            int cpTrieOffset = bytes.getInt();  // inIndexes[IX_CPTRIE_OFFSET]
            int indexesLength = cpTrieOffset / 4;
            if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
                throw new ICUUncheckedIOException(
                        "Emoji properties data: not enough indexes");
            }

            int[] inIndexes = new int[indexesLength];
            inIndexes[0] = cpTrieOffset;
            for (int i = 1; i < indexesLength; ++i) {
                inIndexes[i] = bytes.getInt();
            }

            int i = IX_CPTRIE_OFFSET;
            int offset = inIndexes[i++];
            int nextOffset = inIndexes[i];
            cpTrie = CodePointTrie.Fast8.fromBinary(bytes);
            int pos = bytes.position() - startPos;
            assert nextOffset >= pos;
            ICUBinary.skipBytes(bytes, nextOffset - pos);  // skip padding after trie bytes

            offset = nextOffset;
            nextOffset = inIndexes[IX_BASIC_EMOJI_TRIE_OFFSET];
            ICUBinary.skipBytes(bytes, nextOffset - offset);  // skip unknown bytes

            for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) {
                offset = inIndexes[i];
                nextOffset = inIndexes[i + 1];
                // Set/leave null if there is no CharsTrie.
                if (nextOffset > offset) {
                    stringTries[getStringTrieIndex(i)] =
                            ICUBinary.getString(bytes, (nextOffset - offset) / 2, 0);
                }
            }
        } catch(IOException e) {
            throw new ICUUncheckedIOException(e);
        }
    }

    public UnicodeSet addPropertyStarts(UnicodeSet set) {
        // Add the start code point of each same-value range of the trie.
        CodePointMap.Range range = new CodePointMap.Range();
        int start = 0;
        while (cpTrie.getRange(start, null, range)) {
            set.add(start);
            start = range.getEnd() + 1;
        }
        return set;
    }

    // Note: REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
    private static final byte[] bitFlags = {
        BIT_EMOJI,                  // UCHAR_EMOJI=57
        BIT_EMOJI_PRESENTATION,     // UCHAR_EMOJI_PRESENTATION=58
        BIT_EMOJI_MODIFIER,         // UCHAR_EMOJI_MODIFIER=59
        BIT_EMOJI_MODIFIER_BASE,    // UCHAR_EMOJI_MODIFIER_BASE=60
        BIT_EMOJI_COMPONENT,        // UCHAR_EMOJI_COMPONENT=61
        -1,                         // UCHAR_REGIONAL_INDICATOR=62
        -1,                         // UCHAR_PREPENDED_CONCATENATION_MARK=63
        BIT_EXTENDED_PICTOGRAPHIC,  // UCHAR_EXTENDED_PICTOGRAPHIC=64
        BIT_BASIC_EMOJI,            // UCHAR_BASIC_EMOJI=65
        -1,                         // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
        -1,                         // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
        -1,                         // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
        -1,                         // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
        -1,                         // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
        BIT_BASIC_EMOJI,            // UCHAR_RGI_EMOJI=71
    };

    public boolean hasBinaryProperty(int c, int which) {
        if (which < UProperty.EMOJI || UProperty.RGI_EMOJI < which) {
            return false;
        }
        int bit = bitFlags[which - UProperty.EMOJI];
        if (bit < 0) {
            return false;  // not a property that we support in this function
        }
        int bits = cpTrie.get(c);
        return ((bits >> bit) & 1) != 0;
    }

    public boolean hasBinaryProperty(CharSequence s, int which) {
        int length = s.length();
        if (length == 0) { return false; }  // empty string
        // The caller should have delegated single code points to hasBinaryProperty(c, which).
        if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) {
            return false;
        }
        int firstProp = which, lastProp = which;
        if (which == UProperty.RGI_EMOJI) {
            // RGI_Emoji is the union of the other emoji properties of strings.
            firstProp = UProperty.BASIC_EMOJI;
            lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE;
        }
        for (int prop = firstProp; prop <= lastProp; ++prop) {
            String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI];
            if (trieUChars != null) {
                CharsTrie trie = new CharsTrie(trieUChars, 0);
                BytesTrie.Result result = trie.next(s, 0, length);
                if (result.hasValue()) {
                    return true;
                }
            }
        }
        return false;
    }

    public void addStrings(int which, UnicodeSet set) {
        if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) {
            return;
        }
        int firstProp = which, lastProp = which;
        if (which == UProperty.RGI_EMOJI) {
            // RGI_Emoji is the union of the other emoji properties of strings.
            firstProp = UProperty.BASIC_EMOJI;
            lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE;
        }
        for (int prop = firstProp; prop <= lastProp; ++prop) {
            String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI];
            if (trieUChars != null) {
                CharsTrie trie = new CharsTrie(trieUChars, 0);
                for (CharsTrie.Entry entry : trie) {
                    set.add(entry.chars);
                }
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy