All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.breakiter.DictionaryData Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2012-2016, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */

package com.ibm.icu.impl.breakiter;

import java.io.IOException;
import java.nio.ByteBuffer;

import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.util.UResourceBundle;

final class DictionaryData {
    // disallow instantiation
    private DictionaryData() { }

    public static final int TRIE_TYPE_BYTES = 0;
    public static final int TRIE_TYPE_UCHARS = 1;
    public static final int TRIE_TYPE_MASK = 7;
    public static final int TRIE_HAS_VALUES = 8;
    public static final int TRANSFORM_NONE = 0;
    public static final int TRANSFORM_TYPE_OFFSET = 0x1000000;
    public static final int TRANSFORM_TYPE_MASK = 0x7f000000;
    public static final int TRANSFORM_OFFSET_MASK = 0x1fffff;

    public static final int IX_STRING_TRIE_OFFSET = 0;
    public static final int IX_RESERVED1_OFFSET = 1;
    public static final int IX_RESERVED2_OFFSET = 2;
    public static final int IX_TOTAL_SIZE = 3;
    public static final int IX_TRIE_TYPE = 4;
    public static final int IX_TRANSFORM = 5;
    public static final int IX_RESERVED6 = 6;
    public static final int IX_RESERVED7 = 7;
    public static final int IX_COUNT = 8;

    private static final int DATA_FORMAT_ID = 0x44696374;

    public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
        ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BRKITR_BASE_NAME);
        String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
        dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
        ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
        ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
        int[] indexes = new int[IX_COUNT];
        // TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]
        for (int i = 0; i < IX_COUNT; i++) {
            indexes[i] = bytes.getInt();
        }
        int offset = indexes[IX_STRING_TRIE_OFFSET];
        Assert.assrt(offset >= (4 * IX_COUNT));
        if (offset > (4 * IX_COUNT)) {
            int diff = offset - (4 * IX_COUNT);
            ICUBinary.skipBytes(bytes, diff);
        }
        int trieType = indexes[IX_TRIE_TYPE] & TRIE_TYPE_MASK;
        int totalSize = indexes[IX_TOTAL_SIZE] - offset;
        DictionaryMatcher m = null;
        if (trieType == TRIE_TYPE_BYTES) {
            int transform = indexes[IX_TRANSFORM];
            byte[] data = new byte[totalSize];
            bytes.get(data);
            m = new BytesDictionaryMatcher(data, transform);
        } else if (trieType == TRIE_TYPE_UCHARS) {
            Assert.assrt(totalSize % 2 == 0);
            String data = ICUBinary.getString(bytes, totalSize / 2, totalSize & 1);
            m = new CharsDictionaryMatcher(data);
        } else {
            m = null;
        }
        return m;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy